whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit 6ab9e09238fdfd742fe23b81e2d385a1cab49d9b
parent 528985117126f11beea339cf39120ee99da04cd2
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Mon, 22 Oct 2018 17:46:08 +0100

Merge tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
 "This is the main pull request for block changes for 4.20. This
  contains:

   - Series enabling runtime PM for blk-mq (Bart).

   - Two pull requests from Christoph for NVMe, with items such as;
      - Better AEN tracking
      - Multipath improvements
      - RDMA fixes
      - Rework of FC for target removal
      - Fixes for issues identified by static checkers
      - Fabric cleanups, as prep for TCP transport
      - Various cleanups and bug fixes

   - Block merging cleanups (Christoph)

   - Conversion of drivers to generic DMA mapping API (Christoph)

   - Series fixing ref count issues with blkcg (Dennis)

   - Series improving BFQ heuristics (Paolo, et al)

   - Series improving heuristics for the Kyber IO scheduler (Omar)

   - Removal of dangerous bio_rewind_iter() API (Ming)

   - Apply single queue IPI redirection logic to blk-mq (Ming)

   - Set of fixes and improvements for bcache (Coly et al)

   - Series closing a hotplug race with sysfs group attributes (Hannes)

   - Set of patches for lightnvm:
      - pblk trace support (Hans)
      - SPDX license header update (Javier)
      - Tons of refactoring patches to cleanly abstract the 1.2 and 2.0
        specs behind a common core interface. (Javier, Matias)
      - Enable pblk to use a common interface to retrieve chunk metadata
        (Matias)
      - Bug fixes (Various)

   - Set of fixes and updates to the blk IO latency target (Josef)

   - blk-mq queue number updates fixes (Jianchao)

   - Convert a bunch of drivers from the old legacy IO interface to
     blk-mq. This will conclude with the removal of the legacy IO
     interface itself in 4.21, with the rest of the drivers (me, Omar)

   - Removal of the DAC960 driver. The SCSI tree will introduce two
     replacement drivers for this (Hannes)"

* tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block: (204 commits)
  block: setup bounce bio_sets properly
  blkcg: reassociate bios when make_request() is called recursively
  blkcg: fix edge case for blk_get_rl() under memory pressure
  nvme-fabrics: move controller options matching to fabrics
  nvme-rdma: always have a valid trsvcid
  mtip32xx: fully switch to the generic DMA API
  rsxx: switch to the generic DMA API
  umem: switch to the generic DMA API
  sx8: switch to the generic DMA API
  sx8: remove dead IF_64BIT_DMA_IS_POSSIBLE code
  skd: switch to the generic DMA API
  ubd: remove use of blk_rq_map_sg
  nvme-pci: remove duplicate check
  drivers/block: Remove DAC960 driver
  nvme-pci: fix hot removal during error handling
  nvmet-fcloop: suppress a compiler warning
  nvme-core: make implicit seed truncation explicit
  nvmet-fc: fix kernel-doc headers
  nvme-fc: rework the request initialization code
  nvme-fc: introduce struct nvme_fcp_op_w_sgl
  ...

Diffstat:
MDocumentation/admin-guide/cgroup-v2.rst | 8+++++---
DDocumentation/blockdev/README.DAC960 | 756-------------------------------------------------------------------------------
MDocumentation/blockdev/zram.txt | 2+-
MDocumentation/device-mapper/log-writes.txt | 2+-
March/arm/include/asm/io.h | 15---------------
March/arm64/include/asm/io.h | 9---------
March/m68k/emu/nfblock.c | 2+-
Darch/m68k/include/asm/atafd.h | 13-------------
Darch/m68k/include/asm/atafdreg.h | 80-------------------------------------------------------------------------------
March/um/drivers/ubd_kern.c | 236+++++++++++++++++++++++++++++++++----------------------------------------------
March/x86/include/asm/io.h | 12------------
March/x86/include/asm/xen/events.h | 2++
March/x86/xen/enlighten.c | 1+
March/x86/xen/enlighten_pvh.c | 1+
March/x86/xen/platform-pci-unplug.c | 1+
March/x86/xen/pmu.c | 1+
Mblock/Kconfig | 10+++-------
Mblock/Kconfig.iosched | 3---
Mblock/Makefile | 1+
Mblock/bfq-cgroup.c | 4++--
Mblock/bfq-iosched.c | 291+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mblock/bfq-iosched.h | 53+++++++++++++++++++++++++++++++++++++----------------
Mblock/bfq-wf2q.c | 49++++++++++++++++++++++++++++++-------------------
Mblock/bio-integrity.c | 12++++--------
Mblock/bio.c | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mblock/blk-cgroup.c | 123++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mblock/blk-core.c | 276++++++++++---------------------------------------------------------------------
Mblock/blk-flush.c | 6+++---
Mblock/blk-integrity.c | 12++----------
Mblock/blk-iolatency.c | 230+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mblock/blk-merge.c | 88++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mblock/blk-mq-debugfs.c | 13++++++++++---
Mblock/blk-mq-sched.h | 4++--
Mblock/blk-mq-tag.c | 69++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mblock/blk-mq.c | 211++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Ablock/blk-pm.c | 216+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ablock/blk-pm.h | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mblock/blk-softirq.c | 5++---
Mblock/blk-stat.c | 1+
Mblock/blk-throttle.c | 54++++++++++++++++--------------------------------------
Mblock/blk.h | 73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mblock/bounce.c | 41+++++++++++++++++++++++++++++++----------
Mblock/cfq-iosched.c | 16+++++++++++-----
Mblock/elevator.c | 22+---------------------
Mblock/genhd.c | 19++++++++++++++-----
Mblock/kyber-iosched.c | 547++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Ddrivers/block/DAC960.c | 7229-------------------------------------------------------------------------------
Ddrivers/block/DAC960.h | 4414-------------------------------------------------------------------------------
Mdrivers/block/Kconfig | 13-------------
Mdrivers/block/Makefile | 1-
Mdrivers/block/amiflop.c | 318++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Mdrivers/block/aoe/aoe.h | 5++++-
Mdrivers/block/aoe/aoeblk.c | 70+++++++++++++++++++++++++++++++++++++++++++---------------------------
Mdrivers/block/aoe/aoecmd.c | 19+++++++++++++------
Mdrivers/block/aoe/aoedev.c | 15++++++++-------
Mdrivers/block/ataflop.c | 273+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mdrivers/block/drbd/Kconfig | 1-
Mdrivers/block/drbd/drbd_int.h | 15++++++++-------
Mdrivers/block/drbd/drbd_main.c | 16++++++++--------
Mdrivers/block/drbd/drbd_nl.c | 39++++++++++++---------------------------
Mdrivers/block/drbd/drbd_protocol.h | 4++--
Mdrivers/block/drbd/drbd_receiver.c | 35++++++++++++++++++-----------------
Mdrivers/block/drbd/drbd_req.c | 2+-
Mdrivers/block/drbd/drbd_worker.c | 65+++++++++++++++++++++++++++++++++--------------------------------
Mdrivers/block/floppy.c | 68+++++++++++++++++++++++++++++++++++++-------------------------------
Mdrivers/block/loop.c | 5+++--
Mdrivers/block/mtip32xx/mtip32xx.c | 49+++++++++++++++++++------------------------------
Mdrivers/block/null_blk_main.c | 111+++++--------------------------------------------------------------------------
Mdrivers/block/paride/pcd.c | 88++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mdrivers/block/paride/pd.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mdrivers/block/paride/pf.c | 56++++++++++++++++++++++++++++++++++++++++++--------------
Mdrivers/block/pktcdvd.c | 2+-
Mdrivers/block/ps3disk.c | 88++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdrivers/block/ps3vram.c | 2+-
Mdrivers/block/rsxx/core.c | 2+-
Mdrivers/block/rsxx/cregs.c | 2+-
Mdrivers/block/rsxx/dev.c | 2+-
Mdrivers/block/rsxx/dma.c | 52++++++++++++++++++++++++++--------------------------
Mdrivers/block/skd_main.c | 69++++++++++++++++++++++++++++-----------------------------------------
Mdrivers/block/sunvdc.c | 2+-
Mdrivers/block/swim.c | 106+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mdrivers/block/swim3.c | 211+++++++++++++++++++++++++++++++++++++------------------------------------------
Mdrivers/block/sx8.c | 166++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mdrivers/block/umem.c | 42++++++++++++++++++++----------------------
Mdrivers/block/virtio_blk.c | 68+++++++++++++++++++++++++++++++++++++++-----------------------------
Mdrivers/block/xen-blkfront.c | 2+-
Mdrivers/block/xsysace.c | 80++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mdrivers/block/z2ram.c | 87++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mdrivers/block/zram/Kconfig | 2--
Mdrivers/block/zram/zram_drv.c | 28+++++++---------------------
Mdrivers/cdrom/cdrom.c | 29++++++++++++++---------------
Mdrivers/cdrom/gdrom.c | 174++++++++++++++++++++++++++++++++++++-------------------------------------------
Mdrivers/ide/ide-cd.c | 2+-
Mdrivers/ide/ide-gd.c | 2+-
Mdrivers/lightnvm/Kconfig | 3+--
Mdrivers/lightnvm/core.c | 334++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mdrivers/lightnvm/pblk-cache.c | 1+
Mdrivers/lightnvm/pblk-core.c | 587++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mdrivers/lightnvm/pblk-gc.c | 11++++++++++-
Mdrivers/lightnvm/pblk-init.c | 321++++++++++++++++++++++++++++++++-----------------------------------------------
Mdrivers/lightnvm/pblk-map.c | 13++++++++-----
Mdrivers/lightnvm/pblk-rb.c | 110+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mdrivers/lightnvm/pblk-read.c | 86++++++++++++++++++++-----------------------------------------------------------
Mdrivers/lightnvm/pblk-recovery.c | 471+++++++++++++++++++++++++++----------------------------------------------------
Mdrivers/lightnvm/pblk-rl.c | 5+++--
Mdrivers/lightnvm/pblk-sysfs.c | 12+++++++++---
Adrivers/lightnvm/pblk-trace.h | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdrivers/lightnvm/pblk-write.c | 90+++++++++++++++++++++++++++++++------------------------------------------------
Mdrivers/lightnvm/pblk.h | 221+++++++++++++++++++++++++++----------------------------------------------------
Mdrivers/md/bcache/alloc.c | 2+-
Mdrivers/md/bcache/bcache.h | 2+-
Mdrivers/md/bcache/btree.c | 2+-
Mdrivers/md/bcache/closure.h | 3++-
Mdrivers/md/bcache/debug.c | 2+-
Mdrivers/md/bcache/extents.c | 2+-
Mdrivers/md/bcache/request.c | 9++++++---
Mdrivers/md/bcache/request.h | 2+-
Mdrivers/md/bcache/super.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Mdrivers/md/bcache/sysfs.c | 2++
Mdrivers/md/raid0.c | 2+-
Mdrivers/memstick/core/ms_block.c | 2+-
Mdrivers/memstick/core/mspro_block.c | 2+-
Mdrivers/mmc/core/block.c | 2+-
Mdrivers/mtd/mtd_blkdevs.c | 102+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mdrivers/nvdimm/blk.c | 2+-
Mdrivers/nvdimm/btt.c | 2+-
Mdrivers/nvdimm/pmem.c | 2+-
Mdrivers/nvme/host/core.c | 47++++++++++++++++++++++++++++-------------------
Mdrivers/nvme/host/fabrics.c | 37+++++++++++++++++++++++++++++++++++--
Mdrivers/nvme/host/fabrics.h | 2++
Mdrivers/nvme/host/fc.c | 153++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mdrivers/nvme/host/lightnvm.c | 137++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mdrivers/nvme/host/multipath.c | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mdrivers/nvme/host/nvme.h | 35+++++++++++------------------------
Mdrivers/nvme/host/pci.c | 9++++-----
Mdrivers/nvme/host/rdma.c | 78+++++++++++++++++++++---------------------------------------------------------
Mdrivers/nvme/host/trace.h | 28++++++++++++++++++++++++++++
Mdrivers/nvme/target/admin-cmd.c | 4++--
Mdrivers/nvme/target/core.c | 3+--
Mdrivers/nvme/target/discovery.c | 6++----
Mdrivers/nvme/target/fc.c | 136++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mdrivers/nvme/target/fcloop.c | 1+
Mdrivers/nvme/target/io-cmd-bdev.c | 9+++++++--
Mdrivers/nvme/target/io-cmd-file.c | 3++-
Mdrivers/nvme/target/nvmet.h | 1+
Mdrivers/nvme/target/rdma.c | 19+++++++++++++++----
Mdrivers/s390/block/dasd_genhd.c | 2+-
Mdrivers/s390/block/dcssblk.c | 2+-
Mdrivers/s390/block/scm_blk.c | 2+-
Mdrivers/scsi/scsi_lib.c | 11+++++++----
Mdrivers/scsi/scsi_pm.c | 1+
Mdrivers/scsi/sd.c | 3++-
Mdrivers/scsi/sr.c | 3++-
Mdrivers/target/target_core_spc.c | 6+++---
Mdrivers/xen/biomerge.c | 3+--
Mdrivers/xen/xen-acpi-pad.c | 1+
Mfs/buffer.c | 10+++++-----
Mfs/ext4/page-io.c | 2+-
Dinclude/linux/amifd.h | 63---------------------------------------------------------------
Dinclude/linux/amifdreg.h | 82-------------------------------------------------------------------------------
Minclude/linux/bio.h | 78+++++++++++++++++++++---------------------------------------------------------
Minclude/linux/blk-cgroup.h | 145++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Minclude/linux/blk-mq.h | 4++++
Ainclude/linux/blk-pm.h | 24++++++++++++++++++++++++
Minclude/linux/blk_types.h | 1-
Minclude/linux/blkdev.h | 164+++++++------------------------------------------------------------------------
Minclude/linux/bvec.h | 3---
Minclude/linux/cgroup.h | 2++
Minclude/linux/elevator.h | 2+-
Minclude/linux/genhd.h | 5+++--
Minclude/linux/lightnvm.h | 166++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Minclude/linux/mtd/blktrans.h | 5++---
Minclude/linux/nvme.h | 1+
Minclude/linux/percpu-refcount.h | 1+
Minclude/linux/writeback.h | 5+++--
Ainclude/trace/events/kyber.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/xen/xen.h | 4++++
Mkernel/cgroup/cgroup.c | 48+++++++++++++++++++++++++++++++++++++++---------
Mkernel/trace/blktrace.c | 4++--
Mlib/percpu-refcount.c | 28++++++++++++++++++++++++++--
Mmm/page_io.c | 2+-
181 files changed, 5670 insertions(+), 16863 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst @@ -1857,8 +1857,10 @@ following two functions. wbc_init_bio(@wbc, @bio) Should be called for each bio carrying writeback data and - associates the bio with the inode's owner cgroup. Can be - called anytime between bio allocation and submission. + associates the bio with the inode's owner cgroup and the + corresponding request queue. This must be called after + a queue (device) has been associated with the bio and + before submission. wbc_account_io(@wbc, @page, @bytes) Should be called for each data segment being written out. @@ -1877,7 +1879,7 @@ the configuration, the bio may be executed at a lower priority and if the writeback session is holding shared resources, e.g. a journal entry, may lead to priority inversion. There is no one easy solution for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() +cases by skipping wbc_init_bio() or using bio_associate_create_blkg() directly. diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960 @@ -1,756 +0,0 @@ - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Version 2.2.11 for Linux 2.2.19 - Version 2.4.11 for Linux 2.4.12 - - PRODUCTION RELEASE - - 11 October 2001 - - Leonard N. Zubkoff - Dandelion Digital - lnz@dandelion.com - - Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com> - - - INTRODUCTION - -Mylex, Inc. designs and manufactures a variety of high performance PCI RAID -controllers. Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont, -California 94555, USA and can be reached at 510.796.6100 or on the World Wide -Web at http://www.mylex.com. Mylex Technical Support can be reached by -electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at -510.745.7715. Contact information for offices in Europe and Japan is available -on their Web site. - -The latest information on Linux support for DAC960 PCI RAID Controllers, as -well as the most recent release of this driver, will always be available from -my Linux Home Page at URL "http://www.dandelion.com/Linux/". The Linux DAC960 -driver supports all current Mylex PCI RAID controllers including the new -eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely -new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250, -and DAC960PJ/PG/PU/PD/PL. See below for a complete controller list as well as -minimum firmware version requirements. For simplicity, in most places this -documentation refers to DAC960 generically rather than explicitly listing all -the supported models. - -Driver bug reports should be sent via electronic mail to "lnz@dandelion.com". -Please include with the bug report the complete configuration messages reported -by the driver at startup, along with any subsequent system messages relevant to -the controller's operation, and a detailed description of your system's -hardware configuration. Driver bugs are actually quite rare; if you encounter -problems with disks being marked offline, for example, please contact Mylex -Technical Support as the problem is related to the hardware configuration -rather than the Linux driver. - -Please consult the RAID controller documentation for detailed information -regarding installation and configuration of the controllers. This document -primarily provides information specific to the Linux support. - - - DRIVER FEATURES - -The DAC960 RAID controllers are supported solely as high performance RAID -controllers, not as interfaces to arbitrary SCSI devices. The Linux DAC960 -driver operates at the block device level, the same level as the SCSI and IDE -drivers. Unlike other RAID controllers currently supported on Linux, the -DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the -complexity and unnecessary code that would be associated with an implementation -as a SCSI driver. The DAC960 driver is designed for as high a performance as -possible with no compromises or extra code for compatibility with lower -performance devices. The DAC960 driver includes extensive error logging and -online configuration management capabilities. Except for initial configuration -of the controller and adding new disk drives, most everything can be handled -from Linux while the system is operational. - -The DAC960 driver is architected to support up to 8 controllers per system. -Each DAC960 parallel SCSI controller can support up to 15 disk drives per -channel, for a maximum of 60 drives on a four channel controller; the fibre -channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for -a total of 250 drives. The drives installed on a controller are divided into -one or more "Drive Groups", and then each Drive Group is subdivided further -into 1 to 32 "Logical Drives". Each Logical Drive has a specific RAID Level -and caching policy associated with it, and it appears to Linux as a single -block device. Logical Drives are further subdivided into up to 7 partitions -through the normal Linux and PC disk partitioning schemes. Logical Drives are -also known as "System Drives", and Drive Groups are also called "Packs". Both -terms are in use in the Mylex documentation; I have chosen to standardize on -the more generic "Logical Drive" and "Drive Group". - -DAC960 RAID disk devices are named in the style of the obsolete Device File -System (DEVFS). The device corresponding to Logical Drive D on Controller C -is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1 -through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on -Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI -disks the device names will not change in the event of a disk drive failure. -The DAC960 driver is assigned major numbers 48 - 55 with one major number per -controller. The 8 bits of minor number are divided into 5 bits for the Logical -Drive and 3 bits for the partition. - - - SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS - -The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID -PCI RAID Controllers as of the date of this document. It is recommended that -anyone purchasing a Mylex PCI RAID Controller not in the following table -contact the author beforehand to verify that it is or will be supported. - -eXtremeRAID 3000 - 1 Wide Ultra-2/LVD SCSI channel - 2 External Fibre FC-AL channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -eXtremeRAID 2000 - 4 Wide Ultra-160 LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 352 - 2 Wide Ultra-160 LVD SCSI channels - 100MHz Intel i960RN RISC Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 170 - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RM RISC Processor - 16MB/32MB/64MB ECC SDRAM Memory - -AcceleRAID 160 (AcceleRAID 170LP) - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RS RISC Processor - Built in 16M ECC SDRAM Memory - PCI Low Profile Form Factor - fit for 2U height - -eXtremeRAID 1100 (DAC1164P) - 3 Wide Ultra-2/LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 16MB/32MB/64MB Parity SDRAM Memory with Battery Backup - -AcceleRAID 250 (DAC960PTL1) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 200 (DAC960PTL0) - Uses onboard Symbios SCSI chips on certain motherboards - Includes no onboard SCSI Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 150 (DAC960PRL) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 33MHz Intel i960RP RISC Processor - 4MB Parity EDO Memory - -DAC960PJ 1/2/3 Wide Ultra SCSI-3 Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -DAC960PG 1/2/3 Wide Ultra SCSI-3 Channels - 33MHz Intel i960RP RISC Processor - 4MB/8MB ECC EDO Memory - -DAC960PU 1/2/3 Wide Ultra SCSI-3 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PD 1/2/3 Wide Fast SCSI-2 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PL 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960P 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version -6.00-01 or above is required. - -For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required. - -For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is -required. - -For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required. - -For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version -3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware -version 2.73-0-00 or above is required (for single Flash ROM controllers) - -Please note that not all SCSI disk drives are suitable for use with DAC960 -controllers, and only particular firmware versions of any given model may -actually function correctly. Similarly, not all motherboards have a BIOS that -properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150, -DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device. -If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to -verify compatibility. Mylex makes available a hard disk compatibility list at -http://www.mylex.com/support/hdcomp/hd-lists.html. - - - DRIVER INSTALLATION - -This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12. - -To install the DAC960 RAID driver, you may use the following commands, -replacing "/usr/src" with wherever you keep your Linux kernel source tree: - - cd /usr/src - tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz) - mv README.DAC960 linux/Documentation - mv DAC960.[ch] linux/drivers/block - patch -p0 < DAC960.patch (if DAC960.patch is included) - cd linux - make config - make bzImage (or zImage) - -Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your -standard kernel, run lilo if appropriate, and reboot. - -To create the necessary devices in /dev, the "make_rd" script included in -"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used. -LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive -are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with -statically linked executables of LILO and FDISK. This modified version of LILO -will allow booting from a DAC960 controller and/or mounting the root file -system from a DAC960. - -Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID -controllers. Installing directly onto a DAC960 may be problematic from other -Linux distributions until their installation utilities are updated. - - - INSTALLATION NOTES - -Before installing Linux or adding DAC960 logical drives to an existing Linux -system, the controller must first be configured to provide one or more logical -drives using the BIOS Configuration Utility or DACCF. Please note that since -there are only at most 6 usable partitions on each logical drive, systems -requiring more partitions should subdivide a drive group into multiple logical -drives, each of which can have up to 6 usable partitions. Also, note that with -large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63) -rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do -will cause the logical drive geometry to have more than 65535 cylinders which -will make it impossible for FDISK to be used properly. The 8GB BIOS Geometry -can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M -during the BIOS initialization sequence. - -For maximum performance and the most efficient E2FSCK performance, it is -recommended that EXT2 file systems be built with a 4KB block size and 16 block -stride to match the DAC960 controller's 64KB default stripe size. The command -"mke2fs -b 4096 -R stride=16 <device>" is appropriate. Unless there will be a -large number of small files on the file systems, it is also beneficial to add -the "-i 16384" option to increase the bytes per inode parameter thereby -reducing the file system metadata. Finally, on systems that will only be run -with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks -with the "-s 1" option. - - - DAC960 ANNOUNCEMENTS MAILING LIST - -The DAC960 Announcements Mailing List provides a forum for informing Linux -users of new driver releases and other announcements regarding Linux support -for DAC960 PCI RAID Controllers. To join the mailing list, send a message to -"dac960-announce-request@dandelion.com" with the line "subscribe" in the -message body. - - - CONTROLLER CONFIGURATION AND STATUS MONITORING - -The DAC960 RAID controllers running firmware 4.06 or above include a Background -Initialization facility so that system downtime is minimized both for initial -installation and subsequent configuration of additional storage. The BIOS -Configuration Utility (accessible via Alt-R during the BIOS initialization -sequence) is used to quickly configure the controller, and then the logical -drives that have been created are available for immediate use even while they -are still being initialized by the controller. The primary need for online -configuration and status monitoring is then to avoid system downtime when disk -drives fail and must be replaced. Mylex's online monitoring and configuration -utilities are being ported to Linux and will become available at some point in -the future. Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure) -enclosure, the controller is able to rebuild failed drives automatically as -soon as a drive replacement is made available. - -The primary interfaces for controller configuration and status monitoring are -special files created in the /proc/rd/... hierarchy along with the normal -system console logging mechanism. Whenever the system is operating, the DAC960 -driver queries each controller for status information every 10 seconds, and -checks for additional conditions every 60 seconds. The initial status of each -controller is always available for controller N in /proc/rd/cN/initial_status, -and the current status as of the last status monitoring query is available in -/proc/rd/cN/current_status. In addition, status changes are also logged by the -driver to the system console and will appear in the log files maintained by -syslog. The progress of asynchronous rebuild or consistency check operations -is also available in /proc/rd/cN/current_status, and progress messages are -logged to the system console at most every 60 seconds. - -Starting with the 2.2.3/2.0.3 versions of the driver, the status information -available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been -augmented to include the vendor, model, revision, and serial number (if -available) for each physical device found connected to the controller: - -***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PRL PCI RAID Controller - Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB - PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned - PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - SAF-TE Enclosure Management Enabled - Physical Devices: - 0:0 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68016775HA - Disk Status: Online, 17928192 blocks - 0:1 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68004E53HA - Disk Status: Online, 17928192 blocks - 0:2 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13013935HA - Disk Status: Online, 17928192 blocks - 0:3 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13016897HA - Disk Status: Online, 17928192 blocks - 0:4 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68019905HA - Disk Status: Online, 17928192 blocks - 0:5 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68012753HA - Disk Status: Online, 17928192 blocks - 0:6 Vendor: ESG-SHV Model: SCA HSBP M6 Revision: 0.61 - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -To simplify the monitoring process for custom software, the special file -/proc/rd/status returns "OK" when all DAC960 controllers in the system are -operating normally and no failures have occurred, or "ALERT" if any logical -drives are offline or critical or any non-standby physical drives are dead. - -Configuration commands for controller N are available via the special file -/proc/rd/cN/user_command. A human readable command can be written to this -special file to initiate a configuration operation, and the results of the -operation can then be read back from the special file in addition to being -logged to the system console. The shell command sequence - - echo "<configuration-command>" > /proc/rd/c0/user_command - cat /proc/rd/c0/user_command - -is typically used to execute configuration commands. The configuration -commands are: - - flush-cache - - The "flush-cache" command flushes the controller's cache. The system - automatically flushes the cache at shutdown or if the driver module is - unloaded, so this command is only needed to be certain a write back cache - is flushed to disk before the system is powered off by a command to a UPS. - Note that the flush-cache command also stops an asynchronous rebuild or - consistency check, so it should not be used except when the system is being - halted. - - kill <channel>:<target-id> - - The "kill" command marks the physical drive <channel>:<target-id> as DEAD. - This command is provided primarily for testing, and should not be used - during normal system operation. - - make-online <channel>:<target-id> - - The "make-online" command changes the physical drive <channel>:<target-id> - from status DEAD to status ONLINE. In cases where multiple physical drives - have been killed simultaneously, this command may be used to bring all but - one of them back online, after which a rebuild to the final drive is - necessary. - - Warning: make-online should only be used on a dead physical drive that is - an active part of a drive group, never on a standby drive. The command - should never be used on a dead drive that is part of a critical logical - drive; rebuild should be used if only a single drive is dead. - - make-standby <channel>:<target-id> - - The "make-standby" command changes physical drive <channel>:<target-id> - from status DEAD to status STANDBY. It should only be used in cases where - a dead drive was replaced after an automatic rebuild was performed onto a - standby drive. It cannot be used to add a standby drive to the controller - configuration if one was not created initially; the BIOS Configuration - Utility must be used for that currently. - - rebuild <channel>:<target-id> - - The "rebuild" command initiates an asynchronous rebuild onto physical drive - <channel>:<target-id>. It should only be used when a dead drive has been - replaced. - - check-consistency <logical-drive-number> - - The "check-consistency" command initiates an asynchronous consistency check - of <logical-drive-number> with automatic restoration. It can be used - whenever it is desired to verify the consistency of the redundancy - information. - - cancel-rebuild - cancel-consistency-check - - The "cancel-rebuild" and "cancel-consistency-check" commands cancel any - rebuild or consistency check operations previously initiated. - - - EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group without a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:1 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages indicating that Logical -Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:1 is now DEAD -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -The Sense Keys logged here are just Check Condition / Unit Attention conditions -arising from a SCSI bus reset that is forced by the controller during its error -recovery procedures. Concurrently with the above, the driver status available -from /proc/rd also reflects the drive failure. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Dead, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -Since there are no standby drives configured, the system can continue to access -the logical drives in a performance degraded mode until the failed drive is -replaced and a rebuild operation completed to restore the redundancy of the -logical drives. Once Physical Drive 1:1 is replaced with a properly -functioning drive, or if the physical drive was killed without having failed -(e.g., due to electrical problems on the SCSI bus), the user can instruct the -controller to initiate a rebuild operation onto the newly replaced drive: - -gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Rebuild of Physical Drive 1:1 Initiated - -The echo command instructs the controller to initiate an asynchronous rebuild -operation onto Physical Drive 1:1, and the status message that results from the -operation is then available for reading from /proc/rd/c0/user_command, as well -as being logged to the console by the driver. - -Within 10 seconds of this command the driver logs the initiation of the -asynchronous rebuild operation: - -DAC960#0: Rebuild of Physical Drive 1:1 Initiated -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:1 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed - -and every minute a progress message is logged to the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:1 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - - - EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group with a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Standby, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:2 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages: - -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:2 is now DEAD -DAC960#0: Physical Drive 1:2 killed because it was removed -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -Since a standby drive is configured, the controller automatically begins -rebuilding onto the standby drive: - -DAC960#0: Physical Drive 1:3 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -Concurrently with the above, the driver status available from /proc/rd also -reflects the drive failure and automatic rebuild. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed - -and every minute a progress message is logged on the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:3 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -Note that the absence of a viable standby drive does not create an "ALERT" -status. Once dead Physical Drive 1:2 has been replaced, the controller must be -told that this has occurred and that the newly replaced drive should become the -new standby drive: - -gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Make Standby of Physical Drive 1:2 Succeeded - -The echo command instructs the controller to make Physical Drive 1:2 into a -standby drive, and the status message that results from the operation is then -available for reading from /proc/rd/c0/user_command, as well as being logged to -the console by the driver. Within 60 seconds of this command the driver logs: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:2 is now STANDBY -DAC960#0: Make Standby of Physical Drive 1:2 Succeeded - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Standby, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt @@ -190,7 +190,7 @@ whitespace: notify_free Depending on device usage scenario it may account a) the number of pages freed because of swap slot free notifications or b) the number of pages freed because of - REQ_DISCARD requests sent by bio. The former ones are + REQ_OP_DISCARD requests sent by bio. The former ones are sent to a swap block device when a swap slot is freed, which implies that this disk is being used as a swap disk. The latter ones are sent by filesystem mounted with diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt @@ -38,7 +38,7 @@ inconsistent file system. Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as they complete as those requests will obviously bypass the device cache. -Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would +Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would have all the DISCARD requests, and then the WRITE requests and then the FLUSH request. Consider the following example: diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h @@ -28,7 +28,6 @@ #include <asm/byteorder.h> #include <asm/memory.h> #include <asm-generic/pci_iomap.h> -#include <xen/xen.h> /* * ISA I/O bus memory addresses are 1:1 with the physical address. @@ -459,20 +458,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include <asm-generic/io.h> -/* - * can the hardware map this into one segment or not, given no other - * constraints. - */ -#define BIOVEC_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h @@ -31,8 +31,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <xen/xen.h> - /* * Generic IO read/write. These perform native-endian accesses. */ @@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c @@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - bvec_to_phys(&bvec)); + page_to_phys(bvec.bv_page) + bvec.bv_offset); sec += len; } bio_endio(bio); diff --git a/arch/m68k/include/asm/atafd.h b/arch/m68k/include/asm/atafd.h @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_M68K_FD_H -#define _ASM_M68K_FD_H - -/* Definitions for the Atari Floppy driver */ - -struct atari_format_descr { - int track; /* to be formatted */ - int head; /* "" "" */ - int sect_offset; /* offset of first sector */ -}; - -#endif diff --git a/arch/m68k/include/asm/atafdreg.h b/arch/m68k/include/asm/atafdreg.h @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FDREG_H -#define _LINUX_FDREG_H - -/* -** WD1772 stuff - */ - -/* register codes */ - -#define FDCSELREG_STP (0x80) /* command/status register */ -#define FDCSELREG_TRA (0x82) /* track register */ -#define FDCSELREG_SEC (0x84) /* sector register */ -#define FDCSELREG_DTA (0x86) /* data register */ - -/* register names for FDC_READ/WRITE macros */ - -#define FDCREG_CMD 0 -#define FDCREG_STATUS 0 -#define FDCREG_TRACK 2 -#define FDCREG_SECTOR 4 -#define FDCREG_DATA 6 - -/* command opcodes */ - -#define FDCCMD_RESTORE (0x00) /* - */ -#define FDCCMD_SEEK (0x10) /* | */ -#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ -#define FDCCMD_STIN (0x40) /* | */ -#define FDCCMD_STOT (0x60) /* - */ -#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ -#define FDCCMD_WRSEC (0xa0) /* - " */ -#define FDCCMD_RDADR (0xc0) /* - */ -#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ -#define FDCCMD_WRTRA (0xf0) /* - */ -#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ - -/* command modifier bits */ - -#define FDCCMDADD_SR6 (0x00) /* step rate settings */ -#define FDCCMDADD_SR12 (0x01) -#define FDCCMDADD_SR2 (0x02) -#define FDCCMDADD_SR3 (0x03) -#define FDCCMDADD_V (0x04) /* verify */ -#define FDCCMDADD_H (0x08) /* wait for spin-up */ -#define FDCCMDADD_U (0x10) /* update track register */ -#define FDCCMDADD_M (0x10) /* multiple sector access */ -#define FDCCMDADD_E (0x04) /* head settling flag */ -#define FDCCMDADD_P (0x02) /* precompensation off */ -#define FDCCMDADD_A0 (0x01) /* DAM flag */ - -/* status register bits */ - -#define FDCSTAT_MOTORON (0x80) /* motor on */ -#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ -#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ -#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ -#define FDCSTAT_RECNF (0x10) /* record not found */ -#define FDCSTAT_CRC (0x08) /* CRC error */ -#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ -#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ -#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ -#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ -#define FDCSTAT_BUSY (0x01) /* FDC is busy */ - - -/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ -#define DSKSIDE (0x01) - -#define DSKDRVNONE (0x06) -#define DSKDRV0 (0x02) -#define DSKDRV1 (0x04) - -/* step rates */ -#define FDCSTEP_6 0x00 -#define FDCSTEP_12 0x01 -#define FDCSTEP_2 0x02 -#define FDCSTEP_3 0x03 - -#endif diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/cdrom.h> @@ -142,7 +143,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,11 +156,8 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; - struct scatterlist sg[MAX_SG]; - struct request *request; - int start_sg, end_sg; - sector_t rq_pos; }; #define DEFAULT_COW { \ @@ -182,10 +179,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +189,9 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -436,11 +432,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -498,9 +491,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -520,23 +510,17 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; + + if (!blk_update_request(io_req->req, err, io_req->length)) + __blk_mq_end_request(io_req->req, err); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -857,6 +841,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -891,7 +876,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk); + device_add_disk(parent, disk, NULL); *disk_out = disk; return 0; @@ -899,6 +884,10 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -915,15 +904,23 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -931,7 +928,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -949,6 +946,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1290,123 +1289,82 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, req->bitmap_words, bitmap_len); } -/* Called with dev->lock held */ -static void prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + u64 off, struct bio_vec *bvec) { - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; - - io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->fds[1] = ubd_dev->fd; - io_req->cow_offset = -1; - io_req->offset = offset; - io_req->length = len; - io_req->error = 0; - io_req->sector_mask = 0; - - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = ubd_dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; - io_req->sectorsize = 1 << 9; - - if(ubd_dev->cow.file != NULL) - cowify_req(io_req, ubd_dev->cow.bitmap, - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); - -} + struct ubd *dev = hctx->queue->queuedata; + struct io_thread_req *io_req; + int ret; -/* Called with dev->lock held */ -static void prepare_flush_request(struct request *req, - struct io_thread_req *io_req) -{ - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (!io_req) + return -ENOMEM; io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->op = UBD_FLUSH; -} + if (dev->cow.file) + io_req->fds[0] = dev->cow.fd; + else + io_req->fds[0] = dev->fd; -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) -{ - int n = os_write_file(thread_fd, &io_req, - sizeof(io_req)); - if (n != sizeof(io_req)) { - if (n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + if (req_op(req) == REQ_OP_FLUSH) { + io_req->op = UBD_FLUSH; + } else { + io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; + io_req->offset = off; + io_req->length = bvec->bv_len; + io_req->error = 0; + io_req->sector_mask = 0; + + io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; + io_req->sectorsize = 1 << 9; + + if (dev->cow.file) { + cowify_req(io_req, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); + } + } + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { + if (ret != -EAGAIN) + pr_err("write to io thread failed: %d\n", -ret); kfree(io_req); - return false; } - return true; + + return ret; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct io_thread_req *io_req; - struct request *req; - - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; - - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); - } - - req = dev->request; + struct request *req = bd->rq; + int ret = 0; - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; - } + blk_mq_start_request(req); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; - - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); - - if (submit_request(io_req, dev) == false) - return; - - dev->rq_pos += sg->length >> 9; - dev->start_sg++; + if (req_op(req) == REQ_OP_FLUSH) { + ret = ubd_queue_one_vec(hctx, req, 0, NULL); + } else { + struct req_iterator iter; + struct bio_vec bvec; + u64 off = (u64)blk_rq_pos(req) << 9; + + rq_for_each_segment(bvec, req, iter) { + ret = ubd_queue_one_vec(hctx, req, off, &bvec); + if (ret < 0) + goto out; + off += bvec.bv_len; } - dev->end_sg = 0; - dev->request = NULL; } +out: + if (ret < 0) { + blk_mq_requeue_request(req, true); + } + return BLK_STS_OK; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h @@ -369,18 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); -#ifdef CONFIG_XEN -#include <xen/xen.h> -struct bio_vec; - -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); - -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -#endif /* CONFIG_XEN */ - #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_XEN_EVENTS_H #define _ASM_X86_XEN_EVENTS_H +#include <xen/xen.h> + enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c @@ -5,6 +5,7 @@ #include <linux/kexec.h> #include <linux/slab.h> +#include <xen/xen.h> #include <xen/features.h> #include <xen/page.h> #include <xen/interface/memory.h> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c @@ -11,6 +11,7 @@ #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/interface/memory.h> #include <xen/interface/hvm/start_info.h> diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c @@ -23,6 +23,7 @@ #include <linux/io.h> #include <linux/export.h> +#include <xen/xen.h> #include <xen/platform_pci.h> #include "xen-ops.h" diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c @@ -3,6 +3,7 @@ #include <linux/interrupt.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> diff --git a/block/Kconfig b/block/Kconfig @@ -74,7 +74,6 @@ config BLK_DEV_BSG config BLK_DEV_BSGLIB bool "Block layer SG support v4 helper lib" - default n select BLK_DEV_BSG select BLK_SCSI_REQUEST help @@ -107,7 +106,6 @@ config BLK_DEV_ZONED config BLK_DEV_THROTTLING bool "Block layer bio throttling support" depends on BLK_CGROUP=y - default n ---help--- Block layer bio throttling support. It can be used to limit the IO rate to a device. IO rate policies are per cgroup and @@ -119,7 +117,6 @@ config BLK_DEV_THROTTLING config BLK_DEV_THROTTLING_LOW bool "Block throttling .low limit interface support (EXPERIMENTAL)" depends on BLK_DEV_THROTTLING - default n ---help--- Add .low limit interface for block throttling. The low limit is a best effort limit to prioritize cgroups. Depending on the setting, the limit @@ -130,7 +127,6 @@ config BLK_DEV_THROTTLING_LOW config BLK_CMDLINE_PARSER bool "Block device command line partition parser" - default n ---help--- Enabling this option allows you to specify the partition layout from the kernel boot args. This is typically of use for embedded devices @@ -141,7 +137,6 @@ config BLK_CMDLINE_PARSER config BLK_WBT bool "Enable support for block device writeback throttling" - default n ---help--- Enabling this option enables the block layer to throttle buffered background writeback from the VM, making it more smooth and having @@ -152,7 +147,6 @@ config BLK_WBT config BLK_CGROUP_IOLATENCY bool "Enable support for latency based cgroup IO protection" depends on BLK_CGROUP=y - default n ---help--- Enabling this option enables the .latency interface for IO throttling. The IO controller will attempt to maintain average IO latencies below @@ -163,7 +157,6 @@ config BLK_CGROUP_IOLATENCY config BLK_WBT_SQ bool "Single queue writeback throttling" - default n depends on BLK_WBT ---help--- Enable writeback throttling by default on legacy single queue devices @@ -228,4 +221,7 @@ config BLK_MQ_RDMA depends on BLOCK && INFINIBAND default y +config BLK_PM + def_bool BLOCK && PM + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched @@ -36,7 +36,6 @@ config IOSCHED_CFQ config CFQ_GROUP_IOSCHED bool "CFQ Group Scheduling support" depends on IOSCHED_CFQ && BLK_CGROUP - default n ---help--- Enable group IO scheduling in CFQ. @@ -82,7 +81,6 @@ config MQ_IOSCHED_KYBER config IOSCHED_BFQ tristate "BFQ I/O scheduler" - default n ---help--- BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of of the device among all processes according to their weights, @@ -94,7 +92,6 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP - default n ---help--- Enable hierarchical scheduling in BFQ, using the blkio diff --git a/block/Makefile b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_PM) += blk-pm.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c @@ -624,12 +624,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) } /* - * Tell whether there are active queues or groups with differentiated weights. + * Tell whether there are active queues with different weights or + * active groups. */ -static bool bfq_differentiated_weights(struct bfq_data *bfqd) +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) { /* - * For weights to differ, at least one of the trees must contain + * For queue weights to differ, queue_weights_tree must contain * at least two nodes. */ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && @@ -637,9 +638,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) bfqd->queue_weights_tree.rb_node->rb_right) #ifdef CONFIG_BFQ_GROUP_IOSCHED ) || - (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && - (bfqd->group_weights_tree.rb_node->rb_left || - bfqd->group_weights_tree.rb_node->rb_right) + (bfqd->num_active_groups > 0 #endif ); } @@ -657,26 +656,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) * 3) all active groups at the same level in the groups tree have the same * number of children. * - * Unfortunately, keeping the necessary state for evaluating exactly the - * above symmetry conditions would be quite complex and time-consuming. - * Therefore this function evaluates, instead, the following stronger - * sub-conditions, for which it is much easier to maintain the needed - * state: + * Unfortunately, keeping the necessary state for evaluating exactly + * the last two symmetry sub-conditions above would be quite complex + * and time consuming. Therefore this function evaluates, instead, + * only the following stronger two sub-conditions, for which it is + * much easier to maintain the needed state: * 1) all active queues have the same weight, - * 2) all active groups have the same weight, - * 3) all active groups have at most one active child each. - * In particular, the last two conditions are always true if hierarchical - * support and the cgroups interface are not enabled, thus no state needs - * to be maintained in this case. + * 2) there are no active groups. + * In particular, the last condition is always true if hierarchical + * support or the cgroups interface are not enabled, thus no state + * needs to be maintained in this case. */ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) { - return !bfq_differentiated_weights(bfqd); + return !bfq_varied_queue_weights_or_active_groups(bfqd); } /* * If the weight-counter tree passed as input contains no counter for - * the weight of the input entity, then add that counter; otherwise just + * the weight of the input queue, then add that counter; otherwise just * increment the existing counter. * * Note that weight-counter trees contain few nodes in mostly symmetric @@ -687,25 +685,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * In most scenarios, the rate at which nodes are created/destroyed * should be low too. */ -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root) { + struct bfq_entity *entity = &bfqq->entity; struct rb_node **new = &(root->rb_node), *parent = NULL; /* - * Do not insert if the entity is already associated with a + * Do not insert if the queue is already associated with a * counter, which happens if: - * 1) the entity is associated with a queue, - * 2) a request arrival has caused the queue to become both + * 1) a request arrival has caused the queue to become both * non-weight-raised, and hence change its weight, and * backlogged; in this respect, each of the two events * causes an invocation of this function, - * 3) this is the invocation of this function caused by the + * 2) this is the invocation of this function caused by the * second event. This second invocation is actually useless, * and we handle this fact by exiting immediately. More * efficient or clearer solutions might possibly be adopted. */ - if (entity->weight_counter) + if (bfqq->weight_counter) return; while (*new) { @@ -715,7 +713,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, parent = *new; if (entity->weight == __counter->weight) { - entity->weight_counter = __counter; + bfqq->weight_counter = __counter; goto inc_counter; } if (entity->weight < __counter->weight) @@ -724,66 +722,67 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, new = &((*new)->rb_right); } - entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), - GFP_ATOMIC); + bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); /* * In the unlucky event of an allocation failure, we just - * exit. This will cause the weight of entity to not be - * considered in bfq_differentiated_weights, which, in its - * turn, causes the scenario to be deemed wrongly symmetric in - * case entity's weight would have been the only weight making - * the scenario asymmetric. On the bright side, no unbalance - * will however occur when entity becomes inactive again (the - * invocation of this function is triggered by an activation - * of entity). In fact, bfq_weights_tree_remove does nothing - * if !entity->weight_counter. + * exit. This will cause the weight of queue to not be + * considered in bfq_varied_queue_weights_or_active_groups, + * which, in its turn, causes the scenario to be deemed + * wrongly symmetric in case bfqq's weight would have been + * the only weight making the scenario asymmetric. On the + * bright side, no unbalance will however occur when bfqq + * becomes inactive again (the invocation of this function + * is triggered by an activation of queue). In fact, + * bfq_weights_tree_remove does nothing if + * !bfqq->weight_counter. */ - if (unlikely(!entity->weight_counter)) + if (unlikely(!bfqq->weight_counter)) return; - entity->weight_counter->weight = entity->weight; - rb_link_node(&entity->weight_counter->weights_node, parent, new); - rb_insert_color(&entity->weight_counter->weights_node, root); + bfqq->weight_counter->weight = entity->weight; + rb_link_node(&bfqq->weight_counter->weights_node, parent, new); + rb_insert_color(&bfqq->weight_counter->weights_node, root); inc_counter: - entity->weight_counter->num_active++; + bfqq->weight_counter->num_active++; } /* - * Decrement the weight counter associated with the entity, and, if the + * Decrement the weight counter associated with the queue, and, if the * counter reaches 0, remove the counter from the tree. * See the comments to the function bfq_weights_tree_add() for considerations * about overhead. */ void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root) { - if (!entity->weight_counter) + if (!bfqq->weight_counter) return; - entity->weight_counter->num_active--; - if (entity->weight_counter->num_active > 0) + bfqq->weight_counter->num_active--; + if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; - rb_erase(&entity->weight_counter->weights_node, root); - kfree(entity->weight_counter); + rb_erase(&bfqq->weight_counter->weights_node, root); + kfree(bfqq->weight_counter); reset_entity_pointer: - entity->weight_counter = NULL; + bfqq->weight_counter = NULL; } /* - * Invoke __bfq_weights_tree_remove on bfqq and all its inactive - * parent entities. + * Invoke __bfq_weights_tree_remove on bfqq and decrement the number + * of active groups for each queue's inactive parent entity. */ void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = bfqq->entity.parent; - __bfq_weights_tree_remove(bfqd, &bfqq->entity, + __bfq_weights_tree_remove(bfqd, bfqq, &bfqd->queue_weights_tree); for_each_entity(entity) { @@ -797,17 +796,13 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, * next_in_service for details on why * in_service_entity must be checked too). * - * As a consequence, the weight of entity is - * not to be removed. In addition, if entity - * is active, then its parent entities are - * active as well, and thus their weights are - * not to be removed either. In the end, this - * loop must stop here. + * As a consequence, its parent entities are + * active as well, and thus this loop must + * stop here. */ break; } - __bfq_weights_tree_remove(bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups--; } } @@ -3182,6 +3177,13 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } +static bool bfq_bfqq_injectable(struct bfq_queue *bfqq) +{ + return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && + blk_queue_nonrot(bfqq->bfqd->queue) && + bfqq->bfqd->hw_tag; +} + /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. @@ -3291,6 +3293,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, if (ref == 1) /* bfqq is gone, no more actions on it */ return; + bfqq->injected_service = 0; + /* mark bfqq as waiting a request only if a bic still points to it */ if (!bfq_bfqq_busy(bfqq) && reason != BFQQE_BUDGET_TIMEOUT && @@ -3497,9 +3501,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * symmetric scenario where: * (i) each of these processes must get the same throughput as * the others; - * (ii) all these processes have the same I/O pattern - (either sequential or random). - * In fact, in such a scenario, the drive will tend to treat + * (ii) the I/O of each process has the same properties, in + * terms of locality (sequential or random), direction + * (reads or writes), request sizes, greediness + * (from I/O-bound to sporadic), and so on. + * In fact, in such a scenario, the drive tends to treat * the requests of each of these processes in about the same * way as the requests of the others, and thus to provide * each of these processes with about the same throughput @@ -3508,18 +3514,50 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * certainly needed to guarantee that bfqq receives its * assigned fraction of the device throughput (see [1] for * details). + * The problem is that idling may significantly reduce + * throughput with certain combinations of types of I/O and + * devices. An important example is sync random I/O, on flash + * storage with command queueing. So, unless bfqq falls in the + * above cases where idling also boosts throughput, it would + * be important to check conditions (i) and (ii) accurately, + * so as to avoid idling when not strictly needed for service + * guarantees. + * + * Unfortunately, it is extremely difficult to thoroughly + * check condition (ii). And, in case there are active groups, + * it becomes very difficult to check condition (i) too. In + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active + * group. We address this issue with the following bi-modal + * behavior, implemented in the function + * bfq_symmetric_scenario(). * - * We address this issue by controlling, actually, only the - * symmetry sub-condition (i), i.e., provided that - * sub-condition (i) holds, idling is not performed, - * regardless of whether sub-condition (ii) holds. In other - * words, only if sub-condition (i) holds, then idling is + * If there are active groups, then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O (to preserve bandwidth and + * latency guarantees) is a primary concern. + * + * On the opposite end, if there are no active groups, then + * only condition (i) is actually controlled, i.e., provided + * that condition (i) holds, idling is not performed, + * regardless of whether condition (ii) holds. In other words, + * only if condition (i) does not hold, then idling is * allowed, and the device tends to be prevented from queueing - * many requests, possibly of several processes. The reason - * for not controlling also sub-condition (ii) is that we - * exploit preemption to preserve guarantees in case of - * symmetric scenarios, even if (ii) does not hold, as - * explained in the next two paragraphs. + * many requests, possibly of several processes. Since there + * are no active groups, then, to control condition (i) it is + * enough to check whether all active queues have the same + * weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. + * However, for queues with the same weight, a further + * mechanism, preemption, mitigates or even eliminates this + * problem. And it does so without consequences on overall + * throughput. This mechanism and its benefits are explained + * in the next three paragraphs. * * Even if a queue, say Q, is expired when it remains idle, Q * can still preempt the new in-service queue if the next @@ -3533,11 +3571,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * idling allows the internal queues of the device to contain * many requests, and thus to reorder requests, we can rather * safely assume that the internal scheduler still preserves a - * minimum of mid-term fairness. The motivation for using - * preemption instead of idling is that, by not idling, - * service guarantees are preserved without minimally - * sacrificing throughput. In other words, both a high - * throughput and its desired distribution are obtained. + * minimum of mid-term fairness. * * More precisely, this preemption-based, idleless approach * provides fairness in terms of IOPS, and not sectors per @@ -3556,22 +3590,27 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * 1024/8 times as high as the service received by the other * queue. * - * On the other hand, device idling is performed, and thus - * pure sector-domain guarantees are provided, for the - * following queues, which are likely to need stronger - * throughput guarantees: weight-raised queues, and queues - * with a higher weight than other queues. When such queues - * are active, sub-condition (i) is false, which triggers - * device idling. + * The motivation for using preemption instead of idling (for + * queues with the same weight) is that, by not idling, + * service guarantees are preserved (completely or at least in + * part) without minimally sacrificing throughput. And, if + * there is no active group, then the primary expectation for + * this device is probably a high throughput. * - * According to the above considerations, the next variable is - * true (only) if sub-condition (i) holds. To compute the - * value of this variable, we not only use the return value of - * the function bfq_symmetric_scenario(), but also check - * whether bfqq is being weight-raised, because - * bfq_symmetric_scenario() does not take into account also - * weight-raised queues (see comments on - * bfq_weights_tree_add()). + * We are now left only with explaining the additional + * compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain this + * compound condition, we need to add that the function + * bfq_symmetric_scenario checks the weights of only + * non-weight-raised queues, for efficiency reasons (see + * comments on bfq_weights_tree_add()). Then the fact that + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all active queues happen + * to be weight-raised. Actually, we should be even more + * precise here, and differentiate between interactive weight + * raising and soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -3583,7 +3622,8 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * to let requests be served in the desired order until all * the requests already queued in the device have been served. */ - asymmetric_scenario = bfqq->wr_coeff > 1 || + asymmetric_scenario = (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < bfqd->busy_queues) || !bfq_symmetric_scenario(bfqd); /* @@ -3629,6 +3669,30 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); } +static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + /* + * A linear search; but, with a high probability, very few + * steps are needed to find a candidate queue, i.e., a queue + * with enough budget left for its next request. In fact: + * - BFQ dynamically updates the budget of every queue so as + * to accommodate the expected backlog of the queue; + * - if a queue gets all its requests dispatched as injected + * service, then the queue is removed from the active list + * (and re-added only if it gets new requests, but with + * enough budget for its new backlog). + */ + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + bfq_serv_to_charge(bfqq->next_rq, bfqq) <= + bfq_bfqq_budget_left(bfqq)) + return bfqq; + + return NULL; +} + /* * Select a queue for service. If we have a current queue in service, * check whether to continue servicing it, or retrieve and set a new one. @@ -3710,10 +3774,19 @@ check_queue: * No requests pending. However, if the in-service queue is idling * for a new request, or has requests waiting for a completion and * may idle after their completion, then keep it anyway. + * + * Yet, to boost throughput, inject service from other queues if + * possible. */ if (bfq_bfqq_wait_request(bfqq) || (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { - bfqq = NULL; + if (bfq_bfqq_injectable(bfqq) && + bfqq->injected_service * bfqq->inject_coeff < + bfqq->entity.service * 10) + bfqq = bfq_choose_bfqq_for_injection(bfqd); + else + bfqq = NULL; + goto keep_queue; } @@ -3803,6 +3876,14 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, bfq_dispatch_remove(bfqd->queue, rq); + if (bfqq != bfqd->in_service_queue) { + if (likely(bfqd->in_service_queue)) + bfqd->in_service_queue->injected_service += + bfq_serv_to_charge(rq, bfqq); + + goto return_rq; + } + /* * If weight raising has to terminate for bfqq, then next * function causes an immediate update of bfqq's weight, @@ -3821,13 +3902,12 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, * belongs to CLASS_IDLE and other queues are waiting for * service. */ - if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq)) - goto expire; - - return rq; + if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq))) + goto return_rq; -expire: bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED); + +return_rq: return rq; } @@ -4232,6 +4312,13 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_mark_bfqq_has_short_ttime(bfqq); bfq_mark_bfqq_sync(bfqq); bfq_mark_bfqq_just_created(bfqq); + /* + * Aggressively inject a lot of service: up to 90%. + * This coefficient remains constant during bfqq life, + * but this behavior might be changed, after enough + * testing and tuning. + */ + bfqq->inject_coeff = 1; } else bfq_clear_bfqq_sync(bfqq); @@ -4297,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; @@ -5330,7 +5417,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->idle_slice_timer.function = bfq_idle_slice_timer; bfqd->queue_weights_tree = RB_ROOT; - bfqd->group_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->idle_list); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h @@ -108,15 +108,14 @@ struct bfq_sched_data { }; /** - * struct bfq_weight_counter - counter of the number of all active entities + * struct bfq_weight_counter - counter of the number of all active queues * with a given weight. */ struct bfq_weight_counter { - unsigned int weight; /* weight of the entities this counter refers to */ - unsigned int num_active; /* nr of active entities with this weight */ + unsigned int weight; /* weight of the queues this counter refers to */ + unsigned int num_active; /* nr of active queues with this weight */ /* - * Weights tree member (see bfq_data's @queue_weights_tree and - * @group_weights_tree) + * Weights tree member (see bfq_data's @queue_weights_tree) */ struct rb_node weights_node; }; @@ -151,8 +150,6 @@ struct bfq_weight_counter { struct bfq_entity { /* service_tree member */ struct rb_node rb_node; - /* pointer to the weight counter associated with this entity */ - struct bfq_weight_counter *weight_counter; /* * Flag, true if the entity is on a tree (either the active or @@ -266,6 +263,9 @@ struct bfq_queue { /* entity representing this queue in the scheduler */ struct bfq_entity entity; + /* pointer to the weight counter associated with this entity */ + struct bfq_weight_counter *weight_counter; + /* maximum budget allowed from the feedback mechanism */ int max_budget; /* budget expiration (in jiffies) */ @@ -351,6 +351,32 @@ struct bfq_queue { unsigned long split_time; /* time of last split */ unsigned long first_IO_time; /* time of first I/O for this queue */ + + /* max service rate measured so far */ + u32 max_service_rate; + /* + * Ratio between the service received by bfqq while it is in + * service, and the cumulative service (of requests of other + * queues) that may be injected while bfqq is empty but still + * in service. To increase precision, the coefficient is + * measured in tenths of unit. Here are some example of (1) + * ratios, (2) resulting percentages of service injected + * w.r.t. to the total service dispatched while bfqq is in + * service, and (3) corresponding values of the coefficient: + * 1 (50%) -> 10 + * 2 (33%) -> 20 + * 10 (9%) -> 100 + * 9.9 (9%) -> 99 + * 1.5 (40%) -> 15 + * 0.5 (66%) -> 5 + * 0.1 (90%) -> 1 + * + * So, if the coefficient is lower than 10, then + * injected service is more than bfqq service. + */ + unsigned int inject_coeff; + /* amount of service injected in current service slot */ + unsigned int injected_service; }; /** @@ -423,14 +449,9 @@ struct bfq_data { */ struct rb_root queue_weights_tree; /* - * rbtree of non-queue @bfq_entity weight counters, sorted by - * weight. Used to keep track of whether all @bfq_groups have - * the same weight. The tree contains one counter for each - * distinct weight associated to some active @bfq_group (see - * the comments to the functions bfq_weights_tree_[add|remove] - * for further details). + * number of groups with requests still waiting for completion */ - struct rb_root group_weights_tree; + unsigned int num_active_groups; /* * Number of bfq_queues containing requests (including the @@ -825,10 +846,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root); void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root); void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c @@ -788,25 +788,29 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, new_weight = entity->orig_weight * (bfqq ? bfqq->wr_coeff : 1); /* - * If the weight of the entity changes, remove the entity - * from its old weight counter (if there is a counter - * associated with the entity), and add it to the counter - * associated with its new weight. + * If the weight of the entity changes, and the entity is a + * queue, remove the entity from its old weight counter (if + * there is a counter associated with the entity). */ if (prev_weight != new_weight) { - root = bfqq ? &bfqd->queue_weights_tree : - &bfqd->group_weights_tree; - __bfq_weights_tree_remove(bfqd, entity, root); + if (bfqq) { + root = &bfqd->queue_weights_tree; + __bfq_weights_tree_remove(bfqd, bfqq, root); + } else + bfqd->num_active_groups--; } entity->weight = new_weight; /* - * Add the entity to its weights tree only if it is - * not associated with a weight-raised queue. + * Add the entity, if it is not a weight-raised queue, + * to the counter associated with its new weight. */ - if (prev_weight != new_weight && - (bfqq ? bfqq->wr_coeff == 1 : 1)) - /* If we get here, root has been initialized. */ - bfq_weights_tree_add(bfqd, entity, root); + if (prev_weight != new_weight) { + if (bfqq && bfqq->wr_coeff == 1) { + /* If we get here, root has been initialized. */ + bfq_weights_tree_add(bfqd, bfqq, root); + } else + bfqd->num_active_groups++; + } new_st->wsum += entity->weight; @@ -1012,9 +1016,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity, if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); + struct bfq_data *bfqd = bfqg->bfqd; - bfq_weights_tree_add(bfqg->bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups++; } #endif @@ -1181,10 +1185,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) { - bfq_calc_finish(entity, entity->service); + bfq_calc_finish(entity, entity->service); + + if (is_in_service) sd->in_service_entity = NULL; - } + else + /* + * Non in-service entity: nobody will take care of + * resetting its service counter on expiration. Do it + * now. + */ + entity->service = 0; if (entity->tree == &st->active) bfq_active_extract(st, entity); @@ -1685,7 +1696,7 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (!bfqq->dispatched) if (bfqq->wr_coeff == 1) - bfq_weights_tree_add(bfqd, &bfqq->entity, + bfq_weights_tree_add(bfqd, bfqq, &bfqd->queue_weights_tree); if (bfqq->wr_coeff > 1) diff --git a/block/bio-integrity.c b/block/bio-integrity.c @@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio) if (bio_data_dir(bio) == WRITE) { bio_integrity_process(bio, &bio->bi_iter, bi->profile->generate_fn); + } else { + bip->bio_iter = bio->bi_iter; } return true; @@ -331,20 +333,14 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); - struct bvec_iter iter = bio->bi_iter; /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ - if (bio_rewind_iter(bio, &iter, iter.bi_done)) { - bio->bi_status = bio_integrity_process(bio, &iter, - bi->profile->verify_fn); - } else { - bio->bi_status = BLK_STS_IOERR; - } - + bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, + bi->profile->verify_fn); bio_integrity_free(bio); bio_endio(bio); } diff --git a/block/bio.c b/block/bio.c @@ -609,7 +609,9 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); + + blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); @@ -729,7 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: @@ -827,6 +829,8 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + /** * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio * @bio: bio to add pages to @@ -839,38 +843,35 @@ EXPORT_SYMBOL(bio_add_page); */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + ssize_t size, left; + unsigned len, i; size_t offset; - ssize_t size; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; - /* - * Deep magic below: We need to walk the pinned pages backwards - * because we are abusing the space allocated for the bio_vecs - * for the page array. Because the bio_vecs are larger than the - * page pointers by definition this will always work. But it also - * means we can't use bio_add_page, so any changes to it's semantics - * need to be reflected here as well. - */ - bio->bi_iter.bi_size += size; - bio->bi_vcnt += nr_pages; + for (left = size, i = 0; left > 0; left -= len, i++) { + struct page *page = pages[i]; - while (idx--) { - bv[idx].bv_page = pages[idx]; - bv[idx].bv_len = PAGE_SIZE; - bv[idx].bv_offset = 0; + len = min_t(size_t, PAGE_SIZE - offset, left); + if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len)) + return -EINVAL; + offset = 0; } - bv[0].bv_offset += offset; - bv[0].bv_len -= offset; - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; - iov_iter_advance(iter, size); return 0; } @@ -1807,7 +1808,6 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); - bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); @@ -1956,69 +1956,151 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP +/** + * bio_associate_blkg - associate a bio with the a blkg + * @bio: target bio + * @blkg: the blkg to associate + * + * This tries to associate @bio with the specified blkg. Association failure + * is handled by walking up the blkg tree. Therefore, the blkg associated can + * be anything between @blkg and the root_blkg. This situation only happens + * when a cgroup is dying and then the remaining bios will spill to the closest + * alive blkg. + * + * A reference will be taken on the @blkg and will be released when @bio is + * freed. + */ +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +{ + if (unlikely(bio->bi_blkg)) + return -EBUSY; + bio->bi_blkg = blkg_tryget_closest(blkg); + return 0; +} + +/** + * __bio_associate_blkg_from_css - internal blkg association function + * + * This in the core association function that all association paths rely on. + * A blkg reference is taken which is released upon freeing of the bio. + */ +static int __bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + struct request_queue *q = bio->bi_disk->queue; + struct blkcg_gq *blkg; + int ret; + + rcu_read_lock(); + + if (!css || !css->parent) + blkg = q->root_blkg; + else + blkg = blkg_lookup_create(css_to_blkcg(css), q); + + ret = bio_associate_blkg(bio, blkg); + + rcu_read_unlock(); + return ret; +} + +/** + * bio_associate_blkg_from_css - associate a bio with a specified css + * @bio: target bio + * @css: target css + * + * Associate @bio with the blkg found by combining the css's blkg and the + * request_queue of the @bio. This falls back to the queue's root_blkg if + * the association fails with the css. + */ +int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + if (unlikely(bio->bi_blkg)) + return -EBUSY; + return __bio_associate_blkg_from_css(bio, css); +} +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); + #ifdef CONFIG_MEMCG /** - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg + * bio_associate_blkg_from_page - associate a bio with the page's blkg * @bio: target bio * @page: the page to lookup the blkcg from * - * Associate @bio with the blkcg from @page's owning memcg. This works like - * every other associate function wrt references. + * Associate @bio with the blkg from @page's owning memcg and the respective + * request_queue. If cgroup_e_css returns NULL, fall back to the queue's + * root_blkg. + * + * Note: this must be called after bio has an associated device. */ -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) +int bio_associate_blkg_from_page(struct bio *bio, struct page *page) { - struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *css; + int ret; - if (unlikely(bio->bi_css)) + if (unlikely(bio->bi_blkg)) return -EBUSY; if (!page->mem_cgroup) return 0; - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, - &io_cgrp_subsys); - bio->bi_css = blkcg_css; - return 0; + + rcu_read_lock(); + + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + + ret = __bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); + return ret; } #endif /* CONFIG_MEMCG */ /** - * bio_associate_blkcg - associate a bio with the specified blkcg + * bio_associate_create_blkg - associate a bio with a blkg from q + * @q: request_queue where bio is going * @bio: target bio - * @blkcg_css: css of the blkcg to associate - * - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will - * treat @bio as if it were issued by a task which belongs to the blkcg. * - * This function takes an extra reference of @blkcg_css which will be put - * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. + * Associate @bio with the blkg found from the bio's css and the request_queue. + * If one is not found, bio_lookup_blkg creates the blkg. This falls back to + * the queue's root_blkg if association fails. */ -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) +int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { - if (unlikely(bio->bi_css)) - return -EBUSY; - css_get(blkcg_css); - bio->bi_css = blkcg_css; - return 0; + struct cgroup_subsys_state *css; + int ret = 0; + + /* someone has already associated this bio with a blkg */ + if (bio->bi_blkg) + return ret; + + rcu_read_lock(); + + css = blkcg_css(); + + ret = __bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); + return ret; } -EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the specified blkg + * bio_reassociate_blkg - reassociate a bio with a blkg from q + * @q: request_queue where bio is going * @bio: target bio - * @blkg: the blkg to associate * - * Associate @bio with the blkg specified by @blkg. This is the queue specific - * blkcg information associated with the @bio, a reference will be taken on the - * @blkg and will be freed when the bio is freed. + * When submitting a bio, multiple recursive calls to make_request() may occur. + * This causes the initial associate done in blkcg_bio_issue_check() to be + * incorrect and reference the prior request_queue. This performs reassociation + * when this situation happens. */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) { - if (unlikely(bio->bi_blkg)) - return -EBUSY; - if (!blkg_try_get(blkg)) - return -ENODEV; - bio->bi_blkg = blkg; - return 0; + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } + + return bio_associate_create_blkg(q, bio); } /** @@ -2031,10 +2113,6 @@ void bio_disassociate_task(struct bio *bio) put_io_context(bio->bi_ioc); bio->bi_ioc = NULL; } - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } if (bio->bi_blkg) { blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; @@ -2042,16 +2120,16 @@ void bio_disassociate_task(struct bio *bio) } /** - * bio_clone_blkcg_association - clone blkcg association from src to dst bio + * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) +void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_css) - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); + if (src->bi_blkg) + bio_associate_blkg(dst, src->bi_blkg); } -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c @@ -84,6 +84,37 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } +static void __blkg_release(struct rcu_head *rcu) +{ + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + + percpu_ref_exit(&blkg->refcnt); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} + +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +static void blkg_release(struct percpu_ref *ref) +{ + struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); + + call_rcu(&blkg->rcu_head, __blkg_release); +} + /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -110,7 +141,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -217,6 +247,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } + ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, + GFP_NOWAIT | __GFP_NOWARN); + if (ret) + goto err_cancel_ref; + /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -249,6 +284,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); +err_cancel_ref: + percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -259,7 +296,7 @@ err_free_blkg: } /** - * blkg_lookup_create - lookup blkg, try to create one if not there + * __blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -268,12 +305,11 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns pointer to the looked up or created blkg on success, ERR_PTR() - * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not - * dead and bypassing, returns ERR_PTR(-EBUSY). + * Returns the blkg or the closest blkg if blkg_create fails as it walks + * down from root. */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -285,7 +321,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); + return q->root_blkg; blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -293,23 +329,58 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. + * non-root blkgs have access to their parents. Returns the closest + * blkg to the intended blkg should blkg_create() fail. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - - while (parent && !__blkg_lookup(parent, q, false)) { + struct blkcg_gq *ret_blkg = q->root_blkg; + + while (parent) { + blkg = __blkg_lookup(parent, q, false); + if (blkg) { + /* remember closest blkg */ + ret_blkg = blkg; + break; + } pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (pos == blkcg || IS_ERR(blkg)) + if (IS_ERR(blkg)) + return ret_blkg; + if (pos == blkcg) return blkg; } } +/** + * blkg_lookup_create - find or create a blkg + * @blkcg: target block cgroup + * @q: target request_queue + * + * This looks up or creates the blkg representing the unique pair + * of the blkcg and the request_queue. + */ +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) +{ + struct blkcg_gq *blkg = blkg_lookup(blkcg, q); + unsigned long flags; + + if (unlikely(!blkg)) { + spin_lock_irqsave(q->queue_lock, flags); + + blkg = __blkg_lookup_create(blkcg, q); + + spin_unlock_irqrestore(q->queue_lock, flags); + } + + return blkg; +} + static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; @@ -353,7 +424,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - blkg_put(blkg); + percpu_ref_kill(&blkg->refcnt); } /** @@ -381,29 +452,6 @@ static void blkg_destroy_all(struct request_queue *q) } /* - * A group is RCU protected, but having an rcu lock does not mean that one - * can access all the fields of blkg and assume these are valid. For - * example, don't try to follow throtl_data and request queue links. - * - * Having a reference to blkg under an rcu allows accesses to only values - * local to groups like group stats and group rate limits. - */ -void __blkg_release_rcu(struct rcu_head *rcu_head) -{ - struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); - - /* release the blkcg and parent blkg refs this blkg has been holding */ - css_put(&blkg->blkcg->css); - if (blkg->parent) - blkg_put(blkg->parent); - - wb_congested_put(blkg->wb_congested); - - blkg_free(blkg); -} -EXPORT_SYMBOL_GPL(__blkg_release_rcu); - -/* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. */ @@ -1748,8 +1796,7 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - blkg = blkg_try_get(blkg); - if (!blkg) + if (!blkg_tryget(blkg)) goto out; rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c @@ -42,6 +42,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-rq-qos.h" #ifdef CONFIG_DEBUG_FS @@ -421,24 +422,25 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY + * blk_set_pm_only - increment pm_only counter * @q: request queue pointer - * - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not - * set and 1 if the flag was already set. */ -int blk_set_preempt_only(struct request_queue *q) +void blk_set_pm_only(struct request_queue *q) { - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); + atomic_inc(&q->pm_only); } -EXPORT_SYMBOL_GPL(blk_set_preempt_only); +EXPORT_SYMBOL_GPL(blk_set_pm_only); -void blk_clear_preempt_only(struct request_queue *q) +void blk_clear_pm_only(struct request_queue *q) { - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); - wake_up_all(&q->mq_freeze_wq); + int pm_only; + + pm_only = atomic_dec_return(&q->pm_only); + WARN_ON_ONCE(pm_only < 0); + if (pm_only == 0) + wake_up_all(&q->mq_freeze_wq); } -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); +EXPORT_SYMBOL_GPL(blk_clear_pm_only); /** * __blk_run_queue_uncond - run a queue whether or not it has been stopped @@ -917,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue); */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & BLK_MQ_REQ_PREEMPT; while (true) { bool success = false; @@ -925,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { /* - * The code that sets the PREEMPT_ONLY flag is - * responsible for ensuring that that flag is globally - * visible before the queue is unfrozen. + * The code that increments the pm_only counter is + * responsible for ensuring that that counter is + * globally visible before the queue is unfrozen. */ - if (preempt || !blk_queue_preempt_only(q)) { + if (pm || !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -954,7 +956,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || + (pm || (blk_pm_request_resume(q), + !blk_queue_pm_only(q)))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -1051,8 +1054,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - if (!q->mq_ops) - q->queue_lock = lock ? : &q->__queue_lock; + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid @@ -1160,7 +1162,7 @@ int blk_init_allocated_queue(struct request_queue *q) { WARN_ON_ONCE(q->mq_ops); - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); if (!q->fq) return -ENOMEM; @@ -1726,16 +1728,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); -#ifdef CONFIG_PM -static void blk_pm_put_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) - pm_runtime_mark_last_busy(rq->q->dev); -} -#else -static inline void blk_pm_put_request(struct request *rq) {} -#endif - void __blk_put_request(struct request_queue *q, struct request *req) { req_flags_t rq_flags = req->rq_flags; @@ -1752,6 +1744,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) blk_req_zone_write_unlock(req); blk_pm_put_request(req); + blk_pm_mark_last_busy(req); elv_completed_request(q, req); @@ -2440,6 +2433,7 @@ blk_qc_t generic_make_request(struct bio *bio) if (q) blk_queue_exit(q); q = bio->bi_disk->queue; + bio_reassociate_blkg(q, bio); flags = 0; if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; @@ -2750,30 +2744,6 @@ void blk_account_io_done(struct request *req, u64 now) } } -#ifdef CONFIG_PM -/* - * Don't process normal requests when queue is suspended - * or in the process of suspending/resuming - */ -static bool blk_pm_allow_request(struct request *rq) -{ - switch (rq->q->rpm_status) { - case RPM_RESUMING: - case RPM_SUSPENDING: - return rq->rq_flags & RQF_PM; - case RPM_SUSPENDED: - return false; - default: - return true; - } -} -#else -static bool blk_pm_allow_request(struct request *rq) -{ - return true; -} -#endif - void blk_account_io_start(struct request *rq, bool new_io) { struct hd_struct *part; @@ -2819,11 +2789,14 @@ static struct request *elv_next_request(struct request_queue *q) while (1) { list_for_each_entry(rq, &q->queue_head, queuelist) { - if (blk_pm_allow_request(rq)) - return rq; - - if (rq->rq_flags & RQF_SOFTBARRIER) - break; +#ifdef CONFIG_PM + /* + * If a request gets queued in state RPM_SUSPENDED + * then that's a kernel bug. + */ + WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED); +#endif + return rq; } /* @@ -3755,191 +3728,6 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); -#ifdef CONFIG_PM -/** - * blk_pm_runtime_init - Block layer runtime PM initialization routine - * @q: the queue of the device - * @dev: the device the queue belongs to - * - * Description: - * Initialize runtime-PM-related fields for @q and start auto suspend for - * @dev. Drivers that want to take advantage of request-based runtime PM - * should call this function after @dev has been initialized, and its - * request queue @q has been allocated, and runtime PM for it can not happen - * yet(either due to disabled/forbidden or its usage_count > 0). In most - * cases, driver should call this function before any I/O has taken place. - * - * This function takes care of setting up using auto suspend for the device, - * the autosuspend delay is set to -1 to make runtime suspend impossible - * until an updated value is either set by user or by driver. Drivers do - * not need to touch other autosuspend settings. - * - * The block layer runtime PM is request based, so only works for drivers - * that use request as their IO unit instead of those directly use bio's. - */ -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) -{ - /* Don't enable runtime PM for blk-mq until it is ready */ - if (q->mq_ops) { - pm_runtime_disable(dev); - return; - } - - q->dev = dev; - q->rpm_status = RPM_ACTIVE; - pm_runtime_set_autosuspend_delay(q->dev, -1); - pm_runtime_use_autosuspend(q->dev); -} -EXPORT_SYMBOL(blk_pm_runtime_init); - -/** - * blk_pre_runtime_suspend - Pre runtime suspend check - * @q: the queue of the device - * - * Description: - * This function will check if runtime suspend is allowed for the device - * by examining if there are any requests pending in the queue. If there - * are requests pending, the device can not be runtime suspended; otherwise, - * the queue's status will be updated to SUSPENDING and the driver can - * proceed to suspend the device. - * - * For the not allowed case, we mark last busy for the device so that - * runtime PM core will try to autosuspend it some time later. - * - * This function should be called near the start of the device's - * runtime_suspend callback. - * - * Return: - * 0 - OK to runtime suspend the device - * -EBUSY - Device should not be runtime suspended - */ -int blk_pre_runtime_suspend(struct request_queue *q) -{ - int ret = 0; - - if (!q->dev) - return ret; - - spin_lock_irq(q->queue_lock); - if (q->nr_pending) { - ret = -EBUSY; - pm_runtime_mark_last_busy(q->dev); - } else { - q->rpm_status = RPM_SUSPENDING; - } - spin_unlock_irq(q->queue_lock); - return ret; -} -EXPORT_SYMBOL(blk_pre_runtime_suspend); - -/** - * blk_post_runtime_suspend - Post runtime suspend processing - * @q: the queue of the device - * @err: return value of the device's runtime_suspend function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime suspend function and mark last busy for the device so - * that PM core will try to auto suspend the device at a later time. - * - * This function should be called near the end of the device's - * runtime_suspend callback. - */ -void blk_post_runtime_suspend(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_SUSPENDED; - } else { - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_suspend); - -/** - * blk_pre_runtime_resume - Pre runtime resume processing - * @q: the queue of the device - * - * Description: - * Update the queue's runtime status to RESUMING in preparation for the - * runtime resume of the device. - * - * This function should be called near the start of the device's - * runtime_resume callback. - */ -void blk_pre_runtime_resume(struct request_queue *q) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_RESUMING; - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_pre_runtime_resume); - -/** - * blk_post_runtime_resume - Post runtime resume processing - * @q: the queue of the device - * @err: return value of the device's runtime_resume function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime_resume function. If it is successfully resumed, process - * the requests that are queued into the device's queue when it is resuming - * and then mark last busy and initiate autosuspend for it. - * - * This function should be called near the end of the device's - * runtime_resume callback. - */ -void blk_post_runtime_resume(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_ACTIVE; - __blk_run_queue(q); - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - } else { - q->rpm_status = RPM_SUSPENDED; - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - */ -void blk_set_runtime_active(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_set_runtime_active); -#endif - int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); diff --git a/block/blk-flush.c b/block/blk-flush.c @@ -566,12 +566,12 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, EXPORT_SYMBOL(blkdev_issue_flush); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size) + int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); - fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node); + fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) goto fail; @@ -579,7 +579,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, spin_lock_init(&fq->mq_flush_lock); rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); + fq->flush_rq = kzalloc_node(rq_sz, flags, node); if (!fq->flush_rq) goto fail_rq; diff --git a/block/blk-integrity.c b/block/blk-integrity.c @@ -49,12 +49,8 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (seg_size + iv.bv_len > queue_max_segment_size(q)) goto new_segment; @@ -95,12 +91,8 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (sg->length + iv.bv_len > queue_max_segment_size(q)) goto new_segment; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c @@ -115,9 +115,22 @@ struct child_latency_info { atomic_t scale_cookie; }; +struct percentile_stats { + u64 total; + u64 missed; +}; + +struct latency_stat { + union { + struct percentile_stats ps; + struct blk_rq_stat rqs; + }; +}; + struct iolatency_grp { struct blkg_policy_data pd; - struct blk_rq_stat __percpu *stats; + struct latency_stat __percpu *stats; + struct latency_stat cur_stat; struct blk_iolatency *blkiolat; struct rq_depth rq_depth; struct rq_wait rq_wait; @@ -132,6 +145,7 @@ struct iolatency_grp { /* Our current number of IO's for the last summation. */ u64 nr_samples; + bool ssd; struct child_latency_info child_lat; }; @@ -172,6 +186,80 @@ static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat) return pd_to_blkg(&iolat->pd); } +static inline void latency_stat_init(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + stat->ps.total = 0; + stat->ps.missed = 0; + } else + blk_rq_stat_init(&stat->rqs); +} + +static inline void latency_stat_sum(struct iolatency_grp *iolat, + struct latency_stat *sum, + struct latency_stat *stat) +{ + if (iolat->ssd) { + sum->ps.total += stat->ps.total; + sum->ps.missed += stat->ps.missed; + } else + blk_rq_stat_sum(&sum->rqs, &stat->rqs); +} + +static inline void latency_stat_record_time(struct iolatency_grp *iolat, + u64 req_time) +{ + struct latency_stat *stat = get_cpu_ptr(iolat->stats); + if (iolat->ssd) { + if (req_time >= iolat->min_lat_nsec) + stat->ps.missed++; + stat->ps.total++; + } else + blk_rq_stat_add(&stat->rqs, req_time); + put_cpu_ptr(stat); +} + +static inline bool latency_sum_ok(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + u64 thresh = div64_u64(stat->ps.total, 10); + thresh = max(thresh, 1ULL); + return stat->ps.missed < thresh; + } + return stat->rqs.mean <= iolat->min_lat_nsec; +} + +static inline u64 latency_stat_samples(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) + return stat->ps.total; + return stat->rqs.nr_samples; +} + +static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + int exp_idx; + + if (iolat->ssd) + return; + + /* + * CALC_LOAD takes in a number stored in fixed point representation. + * Because we are using this for IO time in ns, the values stored + * are significantly larger than the FIXED_1 denominator (2048). + * Therefore, rounding errors in the calculation are negligible and + * can be ignored. + */ + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, + div64_u64(iolat->cur_win_nsec, + BLKIOLATENCY_EXP_BUCKET_SIZE)); + CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean); +} + static inline bool iolatency_may_queue(struct iolatency_grp *iolat, wait_queue_entry_t *wait, bool first_block) @@ -255,7 +343,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, struct child_latency_info *lat_info, bool up) { - unsigned long qd = blk_queue_depth(blkiolat->rqos.q); + unsigned long qd = blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = atomic_read(&lat_info->scale_cookie); unsigned long max_scale = qd << 1; @@ -295,10 +383,9 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, */ static void scale_change(struct iolatency_grp *iolat, bool up) { - unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q); + unsigned long qd = iolat->blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = iolat->rq_depth.max_depth; - bool changed = false; if (old > qd) old = qd; @@ -308,15 +395,13 @@ static void scale_change(struct iolatency_grp *iolat, bool up) return; if (old < qd) { - changed = true; old += scale; old = min(old, qd); iolat->rq_depth.max_depth = old; wake_up_all(&iolat->rq_wait.wait); } - } else if (old > 1) { + } else { old >>= 1; - changed = true; iolat->rq_depth.max_depth = max(old, 1UL); } } @@ -369,7 +454,7 @@ static void check_scale_change(struct iolatency_grp *iolat) * scale down event. */ samples_thresh = lat_info->nr_samples * 5; - samples_thresh = div64_u64(samples_thresh, 100); + samples_thresh = max(1ULL, div64_u64(samples_thresh, 100)); if (iolat->nr_samples <= samples_thresh) return; } @@ -395,34 +480,12 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg *blkcg; - struct blkcg_gq *blkg; - struct request_queue *q = rqos->q; + struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - rcu_read_lock(); - blkcg = bio_blkcg(bio); - bio_associate_blkcg(bio, &blkcg->css); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - if (!lock) - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - if (!lock) - spin_unlock_irq(q->queue_lock); - } - if (!blkg) - goto out; - - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - bio_associate_blkg(bio, blkg); -out: - rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { @@ -443,7 +506,6 @@ static void iolatency_record_time(struct iolatency_grp *iolat, struct bio_issue *issue, u64 now, bool issue_as_root) { - struct blk_rq_stat *rq_stat; u64 start = bio_issue_time(issue); u64 req_time; @@ -469,9 +531,7 @@ static void iolatency_record_time(struct iolatency_grp *iolat, return; } - rq_stat = get_cpu_ptr(iolat->stats); - blk_rq_stat_add(rq_stat, req_time); - put_cpu_ptr(rq_stat); + latency_stat_record_time(iolat, req_time); } #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC) @@ -482,17 +542,17 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) struct blkcg_gq *blkg = lat_to_blkg(iolat); struct iolatency_grp *parent; struct child_latency_info *lat_info; - struct blk_rq_stat stat; + struct latency_stat stat; unsigned long flags; - int cpu, exp_idx; + int cpu; - blk_rq_stat_init(&stat); + latency_stat_init(iolat, &stat); preempt_disable(); for_each_online_cpu(cpu) { - struct blk_rq_stat *s; + struct latency_stat *s; s = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_sum(&stat, s); - blk_rq_stat_init(s); + latency_stat_sum(iolat, &stat, s); + latency_stat_init(iolat, s); } preempt_enable(); @@ -502,41 +562,36 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) lat_info = &parent->child_lat; - /* - * CALC_LOAD takes in a number stored in fixed point representation. - * Because we are using this for IO time in ns, the values stored - * are significantly larger than the FIXED_1 denominator (2048). - * Therefore, rounding errors in the calculation are negligible and - * can be ignored. - */ - exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, - div64_u64(iolat->cur_win_nsec, - BLKIOLATENCY_EXP_BUCKET_SIZE)); - CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean); + iolat_update_total_lat_avg(iolat, &stat); /* Everything is ok and we don't need to adjust the scale. */ - if (stat.mean <= iolat->min_lat_nsec && + if (latency_sum_ok(iolat, &stat) && atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE) return; /* Somebody beat us to the punch, just bail. */ spin_lock_irqsave(&lat_info->lock, flags); + + latency_stat_sum(iolat, &iolat->cur_stat, &stat); lat_info->nr_samples -= iolat->nr_samples; - lat_info->nr_samples += stat.nr_samples; - iolat->nr_samples = stat.nr_samples; + lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat); + iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat); if ((lat_info->last_scale_event >= now || - now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) && - lat_info->scale_lat <= iolat->min_lat_nsec) + now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME)) goto out; - if (stat.mean <= iolat->min_lat_nsec && - stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) { + if (latency_sum_ok(iolat, &iolat->cur_stat) && + latency_sum_ok(iolat, &stat)) { + if (latency_stat_samples(iolat, &iolat->cur_stat) < + BLKIOLATENCY_MIN_GOOD_SAMPLES) + goto out; if (lat_info->scale_grp == iolat) { lat_info->last_scale_event = now; scale_cookie_change(iolat->blkiolat, lat_info, true); } - } else if (stat.mean > iolat->min_lat_nsec) { + } else if (lat_info->scale_lat == 0 || + lat_info->scale_lat >= iolat->min_lat_nsec) { lat_info->last_scale_event = now; if (!lat_info->scale_grp || lat_info->scale_lat > iolat->min_lat_nsec) { @@ -545,6 +600,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) } scale_cookie_change(iolat->blkiolat, lat_info, false); } + latency_stat_init(iolat, &iolat->cur_stat); out: spin_unlock_irqrestore(&lat_info->lock, flags); } @@ -650,7 +706,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) continue; iolat = blkg_to_lat(blkg); @@ -761,7 +817,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkcg_gq *blkg; - struct blk_iolatency *blkiolat; struct blkg_conf_ctx ctx; struct iolatency_grp *iolat; char *p, *tok; @@ -774,7 +829,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, return ret; iolat = blkg_to_lat(ctx.blkg); - blkiolat = iolat->blkiolat; p = ctx.body; ret = -EINVAL; @@ -835,13 +889,43 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } +static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf, + size_t size) +{ + struct latency_stat stat; + int cpu; + + latency_stat_init(iolat, &stat); + preempt_disable(); + for_each_online_cpu(cpu) { + struct latency_stat *s; + s = per_cpu_ptr(iolat->stats, cpu); + latency_stat_sum(iolat, &stat, s); + } + preempt_enable(); + + if (iolat->rq_depth.max_depth == UINT_MAX) + return scnprintf(buf, size, " missed=%llu total=%llu depth=max", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total); + return scnprintf(buf, size, " missed=%llu total=%llu depth=%u", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total, + iolat->rq_depth.max_depth); +} + static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) { struct iolatency_grp *iolat = pd_to_lat(pd); - unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); - unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); + unsigned long long avg_lat; + unsigned long long cur_win; + + if (iolat->ssd) + return iolatency_ssd_stat(iolat, buf, size); + avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); + cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); if (iolat->rq_depth.max_depth == UINT_MAX) return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu", avg_lat, cur_win); @@ -858,8 +942,8 @@ static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node) iolat = kzalloc_node(sizeof(*iolat), gfp, node); if (!iolat) return NULL; - iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat), - __alignof__(struct blk_rq_stat), gfp); + iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat), + __alignof__(struct latency_stat), gfp); if (!iolat->stats) { kfree(iolat); return NULL; @@ -876,15 +960,21 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) u64 now = ktime_to_ns(ktime_get()); int cpu; + if (blk_queue_nonrot(blkg->q)) + iolat->ssd = true; + else + iolat->ssd = false; + for_each_possible_cpu(cpu) { - struct blk_rq_stat *stat; + struct latency_stat *stat; stat = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_init(stat); + latency_stat_init(iolat, stat); } + latency_stat_init(iolat, &iolat->cur_stat); rq_wait_init(&iolat->rq_wait); spin_lock_init(&iolat->child_lat.lock); - iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q); + iolat->rq_depth.queue_depth = blkg->q->nr_requests; iolat->rq_depth.max_depth = UINT_MAX; iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth; iolat->blkiolat = blkiolat; diff --git a/block/blk-merge.c b/block/blk-merge.c @@ -12,6 +12,69 @@ #include "blk.h" +/* + * Check if the two bvecs from two bios can be merged to one segment. If yes, + * no need to check gap between the two bios since the 1st bio and the 1st bvec + * in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, struct bio *prev, struct bio *next) +{ + struct bio_vec pb, nb; + + if (!bio_has_data(prev) || !queue_virt_boundary(q)) + return false; + + /* + * Don't merge if the 1st bio starts with non-zero offset, otherwise it + * is quite difficult to respect the sg gap limit. We work hard to + * merge a huge number of small single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the merged segment + * ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + if (bios_segs_mergeable(q, prev, &pb, &nb)) + return false; + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, NULL, bio, req->bio); +} + static struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, @@ -134,9 +197,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) + if (!biovec_phys_mergeable(q, bvprvp, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -267,9 +328,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) + if (!biovec_phys_mergeable(q, &bvprv, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -349,17 +408,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, bio_get_last_bvec(bio, &end_bv); bio_get_first_bvec(nxt, &nxt_bv); - if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) - return 0; - - /* - * bio and nxt are contiguous in memory; check if the queue allows - * these two to be merged into one - */ - if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) - return 1; - - return 0; + return biovec_phys_mergeable(q, &end_bv, &nxt_bv); } static inline void @@ -373,10 +422,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, if (*sg && *cluster) { if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) + if (!biovec_phys_mergeable(q, bvprv, bvec)) goto new_segment; (*sg)->length += nbytes; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +static int queue_pm_only_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); + return 0; +} + #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(QUEUED), @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), - QUEUE_FLAG_NAME(PREEMPT_ONLY), }; #undef QUEUE_FLAG_NAME @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, + { "pm_only", 0600, queue_pm_only_show, NULL }, { "state", 0600, queue_state_show, queue_state_write }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, @@ -423,8 +431,7 @@ static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) { const struct show_busy_params *params = data; - if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx && - blk_mq_rq_state(rq) != MQ_RQ_IDLE) + if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h @@ -49,12 +49,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, return true; } -static inline void blk_mq_sched_completed_request(struct request *rq) +static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; if (e && e->type->ops.mq.completed_request) - e->type->ops.mq.completed_request(rq); + e->type->ops.mq.completed_request(rq, now); } static inline void blk_mq_sched_started_request(struct request *rq) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c @@ -232,13 +232,26 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /* * We can hit rq == NULL here, because the tagging functions - * test and set the bit before assining ->rqs[]. + * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } +/** + * bt_for_each - iterate over the requests associated with a hardware queue + * @hctx: Hardware queue to examine. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each request + * associated with @hctx that has been assigned a driver tag. + * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved) + * where rq is a pointer to a request. + * @data: Will be passed as third argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, busy_iter_fn *fn, void *data, bool reserved) { @@ -280,6 +293,18 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) return true; } +/** + * bt_tags_for_each - iterate over the requests in a tag map + * @tags: Tag map to iterate over. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @data, + * @reserved) where rq is a pointer to a request. + * @data: Will be passed as second argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, busy_tag_iter_fn *fn, void *data, bool reserved) { @@ -294,6 +319,15 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data); } +/** + * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map + * @tags: Tag map to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { @@ -302,6 +336,15 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); } +/** + * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set + * @tagset: Tag set to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) { @@ -314,6 +357,20 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +/** + * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag + * @q: Request queue to examine. + * @fn: Pointer to the function that will be called for each request + * on @q. @fn will be called as follows: @fn(hctx, rq, @priv, + * reserved) where rq is a pointer to a request and hctx points + * to the hardware queue associated with the request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as third argument to @fn. + * + * Note: if @q->tag_set is shared with other request queues then @fn will be + * called for all requests on all queues that share that tag set and not only + * for requests associated with @q. + */ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { @@ -321,9 +378,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, int i; /* - * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and - * queue_hw_ctx after freeze the queue, so we use q_usage_counter - * to avoid race with it. + * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx + * while the queue is frozen. So we can use q_usage_counter to avoid + * racing with it. __blk_mq_update_nr_hw_queues() uses + * synchronize_rcu() to ensure this function left the critical section + * below. */ if (!percpu_ref_tryget(&q->q_usage_counter)) return; @@ -332,7 +391,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, struct blk_mq_tags *tags = hctx->tags; /* - * If not software queues are currently mapped to this + * If no software queues are currently mapped to this * hardware queue, there's nothing to check */ if (!blk_mq_hw_queue_mapped(hctx)) diff --git a/block/blk-mq.c b/block/blk-mq.c @@ -33,6 +33,7 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-tag.h" +#include "blk-pm.h" #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" @@ -198,7 +199,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) freeze_depth = atomic_dec_return(&q->mq_freeze_depth); WARN_ON_ONCE(freeze_depth < 0); if (!freeze_depth) { - percpu_ref_reinit(&q->q_usage_counter); + percpu_ref_resurrect(&q->q_usage_counter); wake_up_all(&q->mq_freeze_wq); } } @@ -475,6 +476,7 @@ static void __blk_mq_free_request(struct request *rq) struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); const int sched_tag = rq->internal_tag; + blk_pm_mark_last_busy(rq); if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) @@ -526,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) blk_stat_add(rq, now); } + if (rq->internal_tag != -1) + blk_mq_sched_completed_request(rq, now); + blk_account_io_done(rq, now); if (rq->end_io) { @@ -562,8 +567,20 @@ static void __blk_mq_complete_request(struct request *rq) if (!blk_mq_mark_complete(rq)) return; - if (rq->internal_tag != -1) - blk_mq_sched_completed_request(rq); + + /* + * Most of single queue controllers, there is only one irq vector + * for handling IO completion, and the only irq's affinity is set + * as all possible CPUs. On most of ARCHs, this affinity means the + * irq is handled on one specific CPU. + * + * So complete IO reqeust in softirq context in case of single queue + * for not degrading IO performance by irqsoff latency. + */ + if (rq->q->nr_hw_queues == 1) { + __blk_complete_request(rq); + return; + } if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); @@ -2137,8 +2154,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - blk_mq_debugfs_unregister_hctx(hctx); - if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); @@ -2165,6 +2180,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + blk_mq_debugfs_unregister_hctx(hctx); blk_mq_exit_hctx(q, set, hctx, i); } } @@ -2194,12 +2210,12 @@ static int blk_mq_init_hctx(struct request_queue *q, * runtime */ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!hctx->ctxs) goto unregister_cpu_notifier; - if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL, - node)) + if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) goto free_ctxs; hctx->nr_ctx = 0; @@ -2212,7 +2228,8 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!hctx->fq) goto exit_hctx; @@ -2222,8 +2239,6 @@ static int blk_mq_init_hctx(struct request_queue *q, if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(hctx->srcu); - blk_mq_debugfs_register_hctx(q, hctx); - return 0; free_fq: @@ -2492,6 +2507,39 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_init_queue); +/* + * Helper for setting up a queue with mq ops, given queue depth, and + * the passed in mq ops flags. + */ +struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, + unsigned int queue_depth, + unsigned int set_flags) +{ + struct request_queue *q; + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->nr_hw_queues = 1; + set->queue_depth = queue_depth; + set->numa_node = NUMA_NO_NODE; + set->flags = set_flags; + + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ERR_PTR(ret); + + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); + return q; + } + + return q; +} +EXPORT_SYMBOL(blk_mq_init_sq_queue); + static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) { int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); @@ -2506,48 +2554,90 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) return hw_ctx_size; } +static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( + struct blk_mq_tag_set *set, struct request_queue *q, + int hctx_idx, int node) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = kzalloc_node(blk_mq_hw_ctx_size(set), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node); + if (!hctx) + return NULL; + + if (!zalloc_cpumask_var_node(&hctx->cpumask, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node)) { + kfree(hctx); + return NULL; + } + + atomic_set(&hctx->nr_active, 0); + hctx->numa_node = node; + hctx->queue_num = hctx_idx; + + if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { + free_cpumask_var(hctx->cpumask); + kfree(hctx); + return NULL; + } + blk_mq_hctx_kobj_init(hctx); + + return hctx; +} + static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct request_queue *q) { - int i, j; + int i, j, end; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; - blk_mq_sysfs_unregister(q); - /* protect against switching io scheduler */ mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int node; - - if (hctxs[i]) - continue; + struct blk_mq_hw_ctx *hctx; node = blk_mq_hw_queue_to_node(q->mq_map, i); - hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_KERNEL, node); - if (!hctxs[i]) - break; - - if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, - node)) { - kfree(hctxs[i]); - hctxs[i] = NULL; - break; - } - - atomic_set(&hctxs[i]->nr_active, 0); - hctxs[i]->numa_node = node; - hctxs[i]->queue_num = i; + /* + * If the hw queue has been mapped to another numa node, + * we need to realloc the hctx. If allocation fails, fallback + * to use the previous one. + */ + if (hctxs[i] && (hctxs[i]->numa_node == node)) + continue; - if (blk_mq_init_hctx(q, set, hctxs[i], i)) { - free_cpumask_var(hctxs[i]->cpumask); - kfree(hctxs[i]); - hctxs[i] = NULL; - break; + hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); + if (hctx) { + if (hctxs[i]) { + blk_mq_exit_hctx(q, set, hctxs[i], i); + kobject_put(&hctxs[i]->kobj); + } + hctxs[i] = hctx; + } else { + if (hctxs[i]) + pr_warn("Allocate new hctx on node %d fails,\ + fallback to previous one on node %d\n", + node, hctxs[i]->numa_node); + else + break; } - blk_mq_hctx_kobj_init(hctxs[i]); } - for (j = i; j < q->nr_hw_queues; j++) { + /* + * Increasing nr_hw_queues fails. Free the newly allocated + * hctxs and keep the previous q->nr_hw_queues. + */ + if (i != set->nr_hw_queues) { + j = q->nr_hw_queues; + end = i; + } else { + j = i; + end = q->nr_hw_queues; + q->nr_hw_queues = set->nr_hw_queues; + } + + for (; j < end; j++) { struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { @@ -2559,9 +2649,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, } } - q->nr_hw_queues = i; mutex_unlock(&q->sysfs_lock); - blk_mq_sysfs_register(q); } struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, @@ -2659,25 +2747,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); } -/* Basically redo blk_mq_init_queue with queue frozen */ -static void blk_mq_queue_reinit(struct request_queue *q) -{ - WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); - - blk_mq_debugfs_unregister_hctxs(q); - blk_mq_sysfs_unregister(q); - - /* - * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe - * we should change hctx numa_node according to the new topology (this - * involves freeing and re-allocating memory, worth doing?) - */ - blk_mq_map_swqueue(q); - - blk_mq_sysfs_register(q); - blk_mq_debugfs_register_hctxs(q); -} - static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; @@ -2964,6 +3033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, { struct request_queue *q; LIST_HEAD(head); + int prev_nr_hw_queues; lockdep_assert_held(&set->tag_list_lock); @@ -2987,11 +3057,30 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (!blk_mq_elv_switch_none(&head, q)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_debugfs_unregister_hctxs(q); + blk_mq_sysfs_unregister(q); + } + + prev_nr_hw_queues = set->nr_hw_queues; set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); +fallback: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); - blk_mq_queue_reinit(q); + if (q->nr_hw_queues != set->nr_hw_queues) { + pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", + nr_hw_queues, prev_nr_hw_queues); + set->nr_hw_queues = prev_nr_hw_queues; + blk_mq_map_queues(set); + goto fallback; + } + blk_mq_map_swqueue(q); + } + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_sysfs_register(q); + blk_mq_debugfs_register_hctxs(q); } switch_back: diff --git a/block/blk-pm.c b/block/blk-pm.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/blk-mq.h> +#include <linux/blk-pm.h> +#include <linux/blkdev.h> +#include <linux/pm_runtime.h> +#include "blk-mq.h" +#include "blk-mq-tag.h" + +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + if (!q->dev) + return ret; + + WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + + /* + * Increase the pm_only counter before checking whether any + * non-PM blk_queue_enter() calls are in progress to avoid that any + * new non-PM blk_queue_enter() calls succeed before the pm_only + * counter is decreased again. + */ + blk_set_pm_only(q); + ret = -EBUSY; + /* Switch q_usage_counter from per-cpu to atomic mode. */ + blk_freeze_queue_start(q); + /* + * Wait until atomic mode has been reached. Since that + * involves calling call_rcu(), it is guaranteed that later + * blk_queue_enter() calls see the pm-only state. See also + * http://lwn.net/Articles/573497/. + */ + percpu_ref_switch_to_atomic_sync(&q->q_usage_counter); + if (percpu_ref_is_zero(&q->q_usage_counter)) + ret = 0; + /* Switch q_usage_counter back to per-cpu mode. */ + blk_mq_unfreeze_queue(q); + + spin_lock_irq(q->queue_lock); + if (ret < 0) + pm_runtime_mark_last_busy(q->dev); + else + q->rpm_status = RPM_SUSPENDING; + spin_unlock_irq(q->queue_lock); + + if (ret) + blk_clear_pm_only(q); + + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); + + if (err) + blk_clear_pm_only(q); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); + + if (!err) + blk_clear_pm_only(q); +} +EXPORT_SYMBOL(blk_post_runtime_resume); + +/** + * blk_set_runtime_active - Force runtime status of the queue to be active + * @q: the queue of the device + * + * If the device is left runtime suspended during system suspend the resume + * hook typically resumes the device and corrects runtime status + * accordingly. However, that does not affect the queue runtime PM status + * which is still "suspended". This prevents processing requests from the + * queue. + * + * This function can be used in driver's resume hook to correct queue + * runtime PM status and re-enable peeking requests from the queue. It + * should be called before first request is added to the queue. + */ +void blk_set_runtime_active(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_set_runtime_active); diff --git a/block/blk-pm.h b/block/blk-pm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLOCK_BLK_PM_H_ +#define _BLOCK_BLK_PM_H_ + +#include <linux/pm_runtime.h> + +#ifdef CONFIG_PM +static inline void blk_pm_request_resume(struct request_queue *q) +{ + if (q->dev && (q->rpm_status == RPM_SUSPENDED || + q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + pm_runtime_mark_last_busy(rq->q->dev); +} + +static inline void blk_pm_requeue_request(struct request *rq) +{ + lockdep_assert_held(rq->q->queue_lock); + + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + rq->q->nr_pending--; +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ + lockdep_assert_held(q->queue_lock); + + if (q->dev && !(rq->rq_flags & RQF_PM)) + q->nr_pending++; +} + +static inline void blk_pm_put_request(struct request *rq) +{ + lockdep_assert_held(rq->q->queue_lock); + + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + --rq->q->nr_pending; +} +#else +static inline void blk_pm_request_resume(struct request_queue *q) +{ +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ +} + +static inline void blk_pm_requeue_request(struct request *rq) +{ +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ +} + +static inline void blk_pm_put_request(struct request *rq) +{ +} +#endif + +#endif /* _BLOCK_BLK_PM_H_ */ diff --git a/block/blk-softirq.c b/block/blk-softirq.c @@ -97,8 +97,8 @@ static int blk_softirq_cpu_dead(unsigned int cpu) void __blk_complete_request(struct request *req) { - int ccpu, cpu; struct request_queue *q = req->q; + int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu; unsigned long flags; bool shared = false; @@ -110,8 +110,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (req->cpu != -1) { - ccpu = req->cpu; + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) { if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ccpu); } else diff --git a/block/blk-stat.c b/block/blk-stat.c @@ -190,6 +190,7 @@ void blk_stat_enable_accounting(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); } +EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); struct blk_queue_stats *blk_alloc_queue_stats(void) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c @@ -84,8 +84,7 @@ struct throtl_service_queue { * RB tree of active children throtl_grp's, which are sorted by * their ->disptime. */ - struct rb_root pending_tree; /* RB tree of active tgs */ - struct rb_node *first_pending; /* first node in the tree */ + struct rb_root_cached pending_tree; /* RB tree of active tgs */ unsigned int nr_pending; /* # queued in the tree */ unsigned long first_pending_disptime; /* disptime of the first tg */ struct timer_list pending_timer; /* fires on first_pending_disptime */ @@ -475,7 +474,7 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq) { INIT_LIST_HEAD(&sq->queued[0]); INIT_LIST_HEAD(&sq->queued[1]); - sq->pending_tree = RB_ROOT; + sq->pending_tree = RB_ROOT_CACHED; timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } @@ -616,31 +615,23 @@ static void throtl_pd_free(struct blkg_policy_data *pd) static struct throtl_grp * throtl_rb_first(struct throtl_service_queue *parent_sq) { + struct rb_node *n; /* Service tree is empty */ if (!parent_sq->nr_pending) return NULL; - if (!parent_sq->first_pending) - parent_sq->first_pending = rb_first(&parent_sq->pending_tree); - - if (parent_sq->first_pending) - return rb_entry_tg(parent_sq->first_pending); - - return NULL; -} - -static void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); + n = rb_first_cached(&parent_sq->pending_tree); + WARN_ON_ONCE(!n); + if (!n) + return NULL; + return rb_entry_tg(n); } static void throtl_rb_erase(struct rb_node *n, struct throtl_service_queue *parent_sq) { - if (parent_sq->first_pending == n) - parent_sq->first_pending = NULL; - rb_erase_init(n, &parent_sq->pending_tree); + rb_erase_cached(n, &parent_sq->pending_tree); + RB_CLEAR_NODE(n); --parent_sq->nr_pending; } @@ -658,11 +649,11 @@ static void update_min_dispatch_time(struct throtl_service_queue *parent_sq) static void tg_service_queue_add(struct throtl_grp *tg) { struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; - struct rb_node **node = &parent_sq->pending_tree.rb_node; + struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; struct rb_node *parent = NULL; struct throtl_grp *__tg; unsigned long key = tg->disptime; - int left = 1; + bool leftmost = true; while (*node != NULL) { parent = *node; @@ -672,15 +663,13 @@ static void tg_service_queue_add(struct throtl_grp *tg) node = &parent->rb_left; else { node = &parent->rb_right; - left = 0; + leftmost = false; } } - if (left) - parent_sq->first_pending = &tg->rb_node; - rb_link_node(&tg->rb_node, parent, node); - rb_insert_color(&tg->rb_node, &parent_sq->pending_tree); + rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, + leftmost); } static void __throtl_enqueue_tg(struct throtl_grp *tg) @@ -2126,21 +2115,11 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) -{ -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - /* fallback to root_blkg if we fail to get a blkg ref */ - if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) - bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -#endif -} - bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { struct throtl_qnode *qn = NULL; - struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; @@ -2159,7 +2138,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (unlikely(blk_queue_bypass(q))) goto out_unlock; - blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/blk.h b/block/blk.h @@ -4,6 +4,7 @@ #include <linux/idr.h> #include <linux/blk-mq.h> +#include <xen/xen.h> #include "blk-mq.h" /* Amount of time in which a process may batch requests */ @@ -124,7 +125,7 @@ static inline void __blk_get_queue(struct request_queue *q) } struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size); + int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, @@ -149,6 +150,41 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } +static inline bool biovec_phys_mergeable(struct request_queue *q, + struct bio_vec *vec1, struct bio_vec *vec2) +{ + unsigned long mask = queue_segment_boundary(q); + phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; + phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; + + if (addr1 + vec1->bv_len != addr2) + return false; + if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2)) + return false; + if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) + return false; + return true; +} + +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + +/* + * Check if adding a bio_vec after bprv with offset would create a gap in + * the SG list. Most drivers don't care about this, but some do. + */ +static inline bool bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + if (!queue_virt_boundary(q)) + return false; + return __bvec_gap_to_prev(q, bprv, offset); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); @@ -158,7 +194,38 @@ static inline bool bio_integrity_endio(struct bio *bio) return __bio_integrity_endio(bio); return true; } -#else + +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} + static inline void blk_flush_integrity(void) { } @@ -166,7 +233,7 @@ static inline bool bio_integrity_endio(struct bio *bio) { return true; } -#endif +#endif /* CONFIG_BLK_DEV_INTEGRITY */ void blk_timeout_work(struct work_struct *work); unsigned long blk_rq_timeout(unsigned long timeout); diff --git a/block/bounce.c b/block/bounce.c @@ -31,6 +31,24 @@ static struct bio_set bounce_bio_set, bounce_bio_split; static mempool_t page_pool, isa_page_pool; +static void init_bounce_bioset(void) +{ + static bool bounce_bs_setup; + int ret; + + if (bounce_bs_setup) + return; + + ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + BUG_ON(ret); + if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) + BUG_ON(1); + + ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); + BUG_ON(ret); + bounce_bs_setup = true; +} + #if defined(CONFIG_HIGHMEM) static __init int init_emergency_pool(void) { @@ -44,14 +62,7 @@ static __init int init_emergency_pool(void) BUG_ON(ret); pr_info("pool size: %d pages\n", POOL_SIZE); - ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - BUG_ON(ret); - if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) - BUG_ON(1); - - ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); - BUG_ON(ret); - + init_bounce_bioset(); return 0; } @@ -86,6 +97,8 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) return mempool_alloc_pages(gfp_mask | GFP_DMA, data); } +static DEFINE_MUTEX(isa_mutex); + /* * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA * as the max address, so check if the pool has already been created. @@ -94,14 +107,20 @@ int init_emergency_isa_pool(void) { int ret; - if (mempool_initialized(&isa_page_pool)) + mutex_lock(&isa_mutex); + + if (mempool_initialized(&isa_page_pool)) { + mutex_unlock(&isa_mutex); return 0; + } ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, mempool_free_pages, (void *) 0); BUG_ON(ret); pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); + init_bounce_bioset(); + mutex_unlock(&isa_mutex); return 0; } @@ -257,7 +276,9 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); + + blkcg_bio_issue_init(bio); return bio; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c @@ -1644,14 +1644,20 @@ static void cfq_pd_offline(struct blkg_policy_data *pd) int i; for (i = 0; i < IOPRIO_BE_NR; i++) { - if (cfqg->async_cfqq[0][i]) + if (cfqg->async_cfqq[0][i]) { cfq_put_queue(cfqg->async_cfqq[0][i]); - if (cfqg->async_cfqq[1][i]) + cfqg->async_cfqq[0][i] = NULL; + } + if (cfqg->async_cfqq[1][i]) { cfq_put_queue(cfqg->async_cfqq[1][i]); + cfqg->async_cfqq[1][i] = NULL; + } } - if (cfqg->async_idle_cfqq) + if (cfqg->async_idle_cfqq) { cfq_put_queue(cfqg->async_idle_cfqq); + cfqg->async_idle_cfqq = NULL; + } /* * @blkg is going offline and will be ignored by @@ -3753,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* @@ -3818,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct cfq_group *cfqg; rcu_read_lock(); - cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); + cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); if (!cfqg) { cfqq = &cfqd->oom_cfqq; goto out; diff --git a/block/elevator.c b/block/elevator.c @@ -41,6 +41,7 @@ #include "blk.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-wbt.h" static DEFINE_SPINLOCK(elv_list_lock); @@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } -#ifdef CONFIG_PM -static void blk_pm_requeue_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM)) - rq->q->nr_pending--; -} - -static void blk_pm_add_request(struct request_queue *q, struct request *rq) -{ - if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && - (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); -} -#else -static inline void blk_pm_requeue_request(struct request *rq) {} -static inline void blk_pm_add_request(struct request_queue *q, - struct request *rq) -{ -} -#endif - void elv_requeue_request(struct request_queue *q, struct request *rq) { /* diff --git a/block/genhd.c b/block/genhd.c @@ -567,7 +567,8 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct device *parent, struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -582,6 +583,10 @@ static void register_disk(struct device *parent, struct gendisk *disk) /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); + if (groups) { + WARN_ON(ddev->groups); + ddev->groups = groups; + } if (device_add(ddev)) return; if (!sysfs_deprecated) { @@ -647,6 +652,7 @@ exit: * __device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups * @register_queue: register the queue if set to true * * This function registers the partitioning information in @disk @@ -655,6 +661,7 @@ exit: * FIXME: error handling */ static void __device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, bool register_queue) { dev_t devt; @@ -698,7 +705,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } - register_disk(parent, disk); + register_disk(parent, disk, groups); if (register_queue) blk_register_queue(disk); @@ -712,15 +719,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_integrity_add(disk); } -void device_add_disk(struct device *parent, struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) + { - __device_add_disk(parent, disk, true); + __device_add_disk(parent, disk, groups, true); } EXPORT_SYMBOL(device_add_disk); void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) { - __device_add_disk(parent, disk, false); + __device_add_disk(parent, disk, NULL, false); } EXPORT_SYMBOL(device_add_disk_no_queue_reg); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c @@ -29,19 +29,30 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" -#include "blk-stat.h" -/* Scheduling domains. */ +#define CREATE_TRACE_POINTS +#include <trace/events/kyber.h> + +/* + * Scheduling domains: the device is divided into multiple domains based on the + * request type. + */ enum { KYBER_READ, - KYBER_SYNC_WRITE, - KYBER_OTHER, /* Async writes, discard, etc. */ + KYBER_WRITE, + KYBER_DISCARD, + KYBER_OTHER, KYBER_NUM_DOMAINS, }; -enum { - KYBER_MIN_DEPTH = 256, +static const char *kyber_domain_names[] = { + [KYBER_READ] = "READ", + [KYBER_WRITE] = "WRITE", + [KYBER_DISCARD] = "DISCARD", + [KYBER_OTHER] = "OTHER", +}; +enum { /* * In order to prevent starvation of synchronous requests by a flood of * asynchronous requests, we reserve 25% of requests for synchronous @@ -51,25 +62,87 @@ enum { }; /* - * Initial device-wide depths for each scheduling domain. + * Maximum device-wide depth for each scheduling domain. * - * Even for fast devices with lots of tags like NVMe, you can saturate - * the device with only a fraction of the maximum possible queue depth. - * So, we cap these to a reasonable value. + * Even for fast devices with lots of tags like NVMe, you can saturate the + * device with only a fraction of the maximum possible queue depth. So, we cap + * these to a reasonable value. */ static const unsigned int kyber_depth[] = { [KYBER_READ] = 256, - [KYBER_SYNC_WRITE] = 128, - [KYBER_OTHER] = 64, + [KYBER_WRITE] = 128, + [KYBER_DISCARD] = 64, + [KYBER_OTHER] = 16, }; /* - * Scheduling domain batch sizes. We favor reads. + * Default latency targets for each scheduling domain. + */ +static const u64 kyber_latency_targets[] = { + [KYBER_READ] = 2ULL * NSEC_PER_MSEC, + [KYBER_WRITE] = 10ULL * NSEC_PER_MSEC, + [KYBER_DISCARD] = 5ULL * NSEC_PER_SEC, +}; + +/* + * Batch size (number of requests we'll dispatch in a row) for each scheduling + * domain. */ static const unsigned int kyber_batch_size[] = { [KYBER_READ] = 16, - [KYBER_SYNC_WRITE] = 8, - [KYBER_OTHER] = 8, + [KYBER_WRITE] = 8, + [KYBER_DISCARD] = 1, + [KYBER_OTHER] = 1, +}; + +/* + * Requests latencies are recorded in a histogram with buckets defined relative + * to the target latency: + * + * <= 1/4 * target latency + * <= 1/2 * target latency + * <= 3/4 * target latency + * <= target latency + * <= 1 1/4 * target latency + * <= 1 1/2 * target latency + * <= 1 3/4 * target latency + * > 1 3/4 * target latency + */ +enum { + /* + * The width of the latency histogram buckets is + * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency. + */ + KYBER_LATENCY_SHIFT = 2, + /* + * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency, + * thus, "good". + */ + KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT, + /* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */ + KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT, +}; + +/* + * We measure both the total latency and the I/O latency (i.e., latency after + * submitting to the device). + */ +enum { + KYBER_TOTAL_LATENCY, + KYBER_IO_LATENCY, +}; + +static const char *kyber_latency_type_names[] = { + [KYBER_TOTAL_LATENCY] = "total", + [KYBER_IO_LATENCY] = "I/O", +}; + +/* + * Per-cpu latency histograms: total latency and I/O latency for each scheduling + * domain except for KYBER_OTHER. + */ +struct kyber_cpu_latency { + atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; }; /* @@ -88,12 +161,9 @@ struct kyber_ctx_queue { struct kyber_queue_data { struct request_queue *q; - struct blk_stat_callback *cb; - /* - * The device is divided into multiple scheduling domains based on the - * request type. Each domain has a fixed number of in-flight requests of - * that type device-wide, limited by these tokens. + * Each scheduling domain has a limited number of in-flight requests + * device-wide, limited by these tokens. */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; @@ -103,8 +173,19 @@ struct kyber_queue_data { */ unsigned int async_depth; + struct kyber_cpu_latency __percpu *cpu_latency; + + /* Timer for stats aggregation and adjusting domain tokens. */ + struct timer_list timer; + + unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; + + unsigned long latency_timeout[KYBER_OTHER]; + + int domain_p99[KYBER_OTHER]; + /* Target latencies in nanoseconds. */ - u64 read_lat_nsec, write_lat_nsec; + u64 latency_targets[KYBER_OTHER]; }; struct kyber_hctx_data { @@ -124,233 +205,219 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, static unsigned int kyber_sched_domain(unsigned int op) { - if ((op & REQ_OP_MASK) == REQ_OP_READ) + switch (op & REQ_OP_MASK) { + case REQ_OP_READ: return KYBER_READ; - else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op)) - return KYBER_SYNC_WRITE; - else + case REQ_OP_WRITE: + return KYBER_WRITE; + case REQ_OP_DISCARD: + return KYBER_DISCARD; + default: return KYBER_OTHER; + } } -enum { - NONE = 0, - GOOD = 1, - GREAT = 2, - BAD = -1, - AWFUL = -2, -}; - -#define IS_GOOD(status) ((status) > 0) -#define IS_BAD(status) ((status) < 0) - -static int kyber_lat_status(struct blk_stat_callback *cb, - unsigned int sched_domain, u64 target) +static void flush_latency_buckets(struct kyber_queue_data *kqd, + struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type) { - u64 latency; - - if (!cb->stat[sched_domain].nr_samples) - return NONE; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; + unsigned int bucket; - latency = cb->stat[sched_domain].mean; - if (latency >= 2 * target) - return AWFUL; - else if (latency > target) - return BAD; - else if (latency <= target / 2) - return GREAT; - else /* (latency <= target) */ - return GOOD; + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0); } /* - * Adjust the read or synchronous write depth given the status of reads and - * writes. The goal is that the latencies of the two domains are fair (i.e., if - * one is good, then the other is good). + * Calculate the histogram bucket with the given percentile rank, or -1 if there + * aren't enough samples yet. */ -static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd, - unsigned int sched_domain, int this_status, - int other_status) +static int calculate_percentile(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int type, + unsigned int percentile) { - unsigned int orig_depth, depth; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + unsigned int bucket, samples = 0, percentile_samples; + + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + samples += buckets[bucket]; + + if (!samples) + return -1; /* - * If this domain had no samples, or reads and writes are both good or - * both bad, don't adjust the depth. + * We do the calculation once we have 500 samples or one second passes + * since the first sample was recorded, whichever comes first. */ - if (this_status == NONE || - (IS_GOOD(this_status) && IS_GOOD(other_status)) || - (IS_BAD(this_status) && IS_BAD(other_status))) - return; - - orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth; + if (!kqd->latency_timeout[sched_domain]) + kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); + if (samples < 500 && + time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { + return -1; + } + kqd->latency_timeout[sched_domain] = 0; - if (other_status == NONE) { - depth++; - } else { - switch (this_status) { - case GOOD: - if (other_status == AWFUL) - depth -= max(depth / 4, 1U); - else - depth -= max(depth / 8, 1U); - break; - case GREAT: - if (other_status == AWFUL) - depth /= 2; - else - depth -= max(depth / 4, 1U); + percentile_samples = DIV_ROUND_UP(samples * percentile, 100); + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { + if (buckets[bucket] >= percentile_samples) break; - case BAD: - depth++; - break; - case AWFUL: - if (other_status == GREAT) - depth += 2; - else - depth++; - break; - } + percentile_samples -= buckets[bucket]; } + memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); - depth = clamp(depth, 1U, kyber_depth[sched_domain]); - if (depth != orig_depth) - sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); + trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], + kyber_latency_type_names[type], percentile, + bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples); + + return bucket; } -/* - * Adjust the depth of other requests given the status of reads and synchronous - * writes. As long as either domain is doing fine, we don't throttle, but if - * both domains are doing badly, we throttle heavily. - */ -static void kyber_adjust_other_depth(struct kyber_queue_data *kqd, - int read_status, int write_status, - bool have_samples) -{ - unsigned int orig_depth, depth; - int status; - - orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth; - - if (read_status == NONE && write_status == NONE) { - depth += 2; - } else if (have_samples) { - if (read_status == NONE) - status = write_status; - else if (write_status == NONE) - status = read_status; - else - status = max(read_status, write_status); - switch (status) { - case GREAT: - depth += 2; - break; - case GOOD: - depth++; - break; - case BAD: - depth -= max(depth / 4, 1U); - break; - case AWFUL: - depth /= 2; - break; - } +static void kyber_resize_domain(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int depth) +{ + depth = clamp(depth, 1U, kyber_depth[sched_domain]); + if (depth != kqd->domain_tokens[sched_domain].sb.depth) { + sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); + trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], + depth); } - - depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]); - if (depth != orig_depth) - sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth); } -/* - * Apply heuristics for limiting queue depths based on gathered latency - * statistics. - */ -static void kyber_stat_timer_fn(struct blk_stat_callback *cb) +static void kyber_timer_fn(struct timer_list *t) { - struct kyber_queue_data *kqd = cb->data; - int read_status, write_status; + struct kyber_queue_data *kqd = from_timer(kqd, t, timer); + unsigned int sched_domain; + int cpu; + bool bad = false; + + /* Sum all of the per-cpu latency histograms. */ + for_each_online_cpu(cpu) { + struct kyber_cpu_latency *cpu_latency; + + cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_TOTAL_LATENCY); + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_IO_LATENCY); + } + } - read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec); - write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec); + /* + * Check if any domains have a high I/O latency, which might indicate + * congestion in the device. Note that we use the p90; we don't want to + * be too sensitive to outliers here. + */ + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + int p90; - kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status); - kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status); - kyber_adjust_other_depth(kqd, read_status, write_status, - cb->stat[KYBER_OTHER].nr_samples != 0); + p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY, + 90); + if (p90 >= KYBER_GOOD_BUCKETS) + bad = true; + } /* - * Continue monitoring latencies if we aren't hitting the targets or - * we're still throttling other requests. + * Adjust the scheduling domain depths. If we determined that there was + * congestion, we throttle all domains with good latencies. Either way, + * we ease up on throttling domains with bad latencies. */ - if (!blk_stat_is_active(kqd->cb) && - ((IS_BAD(read_status) || IS_BAD(write_status) || - kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER]))) - blk_stat_activate_msecs(kqd->cb, 100); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + unsigned int orig_depth, depth; + int p99; + + p99 = calculate_percentile(kqd, sched_domain, + KYBER_TOTAL_LATENCY, 99); + /* + * This is kind of subtle: different domains will not + * necessarily have enough samples to calculate the latency + * percentiles during the same window, so we have to remember + * the p99 for the next time we observe congestion; once we do, + * we don't want to throttle again until we get more data, so we + * reset it to -1. + */ + if (bad) { + if (p99 < 0) + p99 = kqd->domain_p99[sched_domain]; + kqd->domain_p99[sched_domain] = -1; + } else if (p99 >= 0) { + kqd->domain_p99[sched_domain] = p99; + } + if (p99 < 0) + continue; + + /* + * If this domain has bad latency, throttle less. Otherwise, + * throttle more iff we determined that there is congestion. + * + * The new depth is scaled linearly with the p99 latency vs the + * latency target. E.g., if the p99 is 3/4 of the target, then + * we throttle down to 3/4 of the current depth, and if the p99 + * is 2x the target, then we double the depth. + */ + if (bad || p99 >= KYBER_GOOD_BUCKETS) { + orig_depth = kqd->domain_tokens[sched_domain].sb.depth; + depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT; + kyber_resize_domain(kqd, sched_domain, depth); + } + } } -static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd) +static unsigned int kyber_sched_tags_shift(struct request_queue *q) { /* * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; -} - -static int kyber_bucket_fn(const struct request *rq) -{ - return kyber_sched_domain(rq->cmd_flags); + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) { struct kyber_queue_data *kqd; - unsigned int max_tokens; unsigned int shift; int ret = -ENOMEM; int i; - kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); + kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); if (!kqd) goto err; + kqd->q = q; - kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn, - KYBER_NUM_DOMAINS, kqd); - if (!kqd->cb) + kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, + GFP_KERNEL | __GFP_ZERO); + if (!kqd->cpu_latency) goto err_kqd; - /* - * The maximum number of tokens for any scheduling domain is at least - * the queue depth of a single hardware queue. If the hardware doesn't - * have many tags, still provide a reasonable number. - */ - max_tokens = max_t(unsigned int, q->tag_set->queue_depth, - KYBER_MIN_DEPTH); + timer_setup(&kqd->timer, kyber_timer_fn, 0); + for (i = 0; i < KYBER_NUM_DOMAINS; i++) { WARN_ON(!kyber_depth[i]); WARN_ON(!kyber_batch_size[i]); ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], - max_tokens, -1, false, GFP_KERNEL, - q->node); + kyber_depth[i], -1, false, + GFP_KERNEL, q->node); if (ret) { while (--i >= 0) sbitmap_queue_free(&kqd->domain_tokens[i]); - goto err_cb; + goto err_buckets; } - sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]); } - shift = kyber_sched_tags_shift(kqd); - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + for (i = 0; i < KYBER_OTHER; i++) { + kqd->domain_p99[i] = -1; + kqd->latency_targets[i] = kyber_latency_targets[i]; + } - kqd->read_lat_nsec = 2000000ULL; - kqd->write_lat_nsec = 10000000ULL; + shift = kyber_sched_tags_shift(q); + kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; return kqd; -err_cb: - blk_stat_free_callback(kqd->cb); +err_buckets: + free_percpu(kqd->cpu_latency); err_kqd: kfree(kqd); err: @@ -372,25 +439,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) return PTR_ERR(kqd); } + blk_stat_enable_accounting(q); + eq->elevator_data = kqd; q->elevator = eq; - blk_stat_add_callback(q, kqd->cb); - return 0; } static void kyber_exit_sched(struct elevator_queue *e) { struct kyber_queue_data *kqd = e->elevator_data; - struct request_queue *q = kqd->q; int i; - blk_stat_remove_callback(q, kqd->cb); + del_timer_sync(&kqd->timer); for (i = 0; i < KYBER_NUM_DOMAINS; i++) sbitmap_queue_free(&kqd->domain_tokens[i]); - blk_stat_free_callback(kqd->cb); + free_percpu(kqd->cpu_latency); kfree(kqd); } @@ -558,41 +624,44 @@ static void kyber_finish_request(struct request *rq) rq_clear_domain_token(kqd, rq); } -static void kyber_completed_request(struct request *rq) +static void add_latency_sample(struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type, + u64 target, u64 latency) { - struct request_queue *q = rq->q; - struct kyber_queue_data *kqd = q->elevator->elevator_data; - unsigned int sched_domain; - u64 now, latency, target; + unsigned int bucket; + u64 divisor; - /* - * Check if this request met our latency goal. If not, quickly gather - * some statistics and start throttling. - */ - sched_domain = kyber_sched_domain(rq->cmd_flags); - switch (sched_domain) { - case KYBER_READ: - target = kqd->read_lat_nsec; - break; - case KYBER_SYNC_WRITE: - target = kqd->write_lat_nsec; - break; - default: - return; + if (latency > 0) { + divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1); + bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), + KYBER_LATENCY_BUCKETS - 1); + } else { + bucket = 0; } - /* If we are already monitoring latencies, don't check again. */ - if (blk_stat_is_active(kqd->cb)) - return; + atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); +} - now = ktime_get_ns(); - if (now < rq->io_start_time_ns) +static void kyber_completed_request(struct request *rq, u64 now) +{ + struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; + struct kyber_cpu_latency *cpu_latency; + unsigned int sched_domain; + u64 target; + + sched_domain = kyber_sched_domain(rq->cmd_flags); + if (sched_domain == KYBER_OTHER) return; - latency = now - rq->io_start_time_ns; + cpu_latency = get_cpu_ptr(kqd->cpu_latency); + target = kqd->latency_targets[sched_domain]; + add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY, + target, now - rq->start_time_ns); + add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target, + now - rq->io_start_time_ns); + put_cpu_ptr(kqd->cpu_latency); - if (latency > target) - blk_stat_activate_msecs(kqd->cb, 10); + timer_reduce(&kqd->timer, jiffies + HZ / 10); } struct flush_kcq_data { @@ -713,6 +782,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { nr = kyber_get_domain_token(kqd, khd, hctx); @@ -723,6 +795,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } @@ -790,17 +865,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) return false; } -#define KYBER_LAT_SHOW_STORE(op) \ -static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \ - char *page) \ +#define KYBER_LAT_SHOW_STORE(domain, name) \ +static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \ + char *page) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ \ - return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \ + return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \ } \ \ -static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ - const char *page, size_t count) \ +static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \ + const char *page, size_t count) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ unsigned long long nsec; \ @@ -810,12 +885,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ if (ret) \ return ret; \ \ - kqd->op##_lat_nsec = nsec; \ + kqd->latency_targets[domain] = nsec; \ \ return count; \ } -KYBER_LAT_SHOW_STORE(read); -KYBER_LAT_SHOW_STORE(write); +KYBER_LAT_SHOW_STORE(KYBER_READ, read); +KYBER_LAT_SHOW_STORE(KYBER_WRITE, write); #undef KYBER_LAT_SHOW_STORE #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) @@ -882,7 +957,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ return 0; \ } KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) -KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) #undef KYBER_DEBUGFS_DOMAIN_ATTRS @@ -900,20 +976,7 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m) struct blk_mq_hw_ctx *hctx = data; struct kyber_hctx_data *khd = hctx->sched_data; - switch (khd->cur_domain) { - case KYBER_READ: - seq_puts(m, "READ\n"); - break; - case KYBER_SYNC_WRITE: - seq_puts(m, "SYNC_WRITE\n"); - break; - case KYBER_OTHER: - seq_puts(m, "OTHER\n"); - break; - default: - seq_printf(m, "%u\n", khd->cur_domain); - break; - } + seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); return 0; } @@ -930,7 +993,8 @@ static int kyber_batching_show(void *data, struct seq_file *m) {#name "_tokens", 0400, kyber_##name##_tokens_show} static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { KYBER_QUEUE_DOMAIN_ATTRS(read), - KYBER_QUEUE_DOMAIN_ATTRS(sync_write), + KYBER_QUEUE_DOMAIN_ATTRS(write), + KYBER_QUEUE_DOMAIN_ATTRS(discard), KYBER_QUEUE_DOMAIN_ATTRS(other), {"async_depth", 0400, kyber_async_depth_show}, {}, @@ -942,7 +1006,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { {#name "_waiting", 0400, kyber_##name##_waiting_show} static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { KYBER_HCTX_DOMAIN_ATTRS(read), - KYBER_HCTX_DOMAIN_ATTRS(sync_write), + KYBER_HCTX_DOMAIN_ATTRS(write), + KYBER_HCTX_DOMAIN_ATTRS(discard), KYBER_HCTX_DOMAIN_ATTRS(other), {"cur_domain", 0400, kyber_cur_domain_show}, {"batching", 0400, kyber_batching_show}, diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c @@ -1,7229 +0,0 @@ -/* - - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com> - Portions Copyright 2002 by Mylex (An IBM Business Unit) - - This program is free software; you may redistribute and/or modify it under - the terms of the GNU General Public License Version 2 as published by the - Free Software Foundation. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for complete details. - -*/ - - -#define DAC960_DriverVersion "2.5.49" -#define DAC960_DriverDate "21 Aug 2007" - - -#include <linux/compiler.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/miscdevice.h> -#include <linux/blkdev.h> -#include <linux/bio.h> -#include <linux/completion.h> -#include <linux/delay.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/blkpg.h> -#include <linux/dma-mapping.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/mutex.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/reboot.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/jiffies.h> -#include <linux/random.h> -#include <linux/scatterlist.h> -#include <asm/io.h> -#include <linux/uaccess.h> -#include "DAC960.h" - -#define DAC960_GAM_MINOR 252 - - -static DEFINE_MUTEX(DAC960_mutex); -static DAC960_Controller_T *DAC960_Controllers[DAC960_MaxControllers]; -static int DAC960_ControllerCount; -static struct proc_dir_entry *DAC960_ProcDirectoryEntry; - -static long disk_size(DAC960_Controller_T *p, int drive_nr) -{ - if (p->FirmwareType == DAC960_V1_Controller) { - if (drive_nr >= p->LogicalDriveCount) - return 0; - return p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (i == NULL) - return 0; - return i->ConfigurableDeviceSize; - } -} - -static int DAC960_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - int ret = -ENXIO; - - mutex_lock(&DAC960_mutex); - if (p->FirmwareType == DAC960_V1_Controller) { - if (p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveState == DAC960_V1_LogicalDrive_Offline) - goto out; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline) - goto out; - } - - check_disk_change(bdev); - - if (!get_capacity(p->disks[drive_nr])) - goto out; - ret = 0; -out: - mutex_unlock(&DAC960_mutex); - return ret; -} - -static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (p->FirmwareType == DAC960_V1_Controller) { - geo->heads = p->V1.GeometryTranslationHeads; - geo->sectors = p->V1.GeometryTranslationSectors; - geo->cylinders = p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize / (geo->heads * geo->sectors); - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - switch (i->DriveGeometry) { - case DAC960_V2_Geometry_128_32: - geo->heads = 128; - geo->sectors = 32; - break; - case DAC960_V2_Geometry_255_63: - geo->heads = 255; - geo->sectors = 63; - break; - default: - DAC960_Error("Illegal Logical Device Geometry %d\n", - p, i->DriveGeometry); - return -EINVAL; - } - - geo->cylinders = i->ConfigurableDeviceSize / - (geo->heads * geo->sectors); - } - - return 0; -} - -static unsigned int DAC960_check_events(struct gendisk *disk, - unsigned int clearing) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (!p->LogicalDriveInitiallyAccessible[drive_nr]) - return DISK_EVENT_MEDIA_CHANGE; - return 0; -} - -static int DAC960_revalidate_disk(struct gendisk *disk) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int unit = (long)disk->private_data; - - set_capacity(disk, disk_size(p, unit)); - return 0; -} - -static const struct block_device_operations DAC960_BlockDeviceOperations = { - .owner = THIS_MODULE, - .open = DAC960_open, - .getgeo = DAC960_getgeo, - .check_events = DAC960_check_events, - .revalidate_disk = DAC960_revalidate_disk, -}; - - -/* - DAC960_AnnounceDriver announces the Driver Version and Date, Author's Name, - Copyright Notice, and Electronic Mail Address. -*/ - -static void DAC960_AnnounceDriver(DAC960_Controller_T *Controller) -{ - DAC960_Announce("***** DAC960 RAID Driver Version " - DAC960_DriverVersion " of " - DAC960_DriverDate " *****\n", Controller); - DAC960_Announce("Copyright 1998-2001 by Leonard N. Zubkoff " - "<lnz@dandelion.com>\n", Controller); -} - - -/* - DAC960_Failure prints a standardized error message, and then returns false. -*/ - -static bool DAC960_Failure(DAC960_Controller_T *Controller, - unsigned char *ErrorMessage) -{ - DAC960_Error("While configuring DAC960 PCI RAID Controller at\n", - Controller); - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_Error("%s FAILED - DETACHING\n", Controller, ErrorMessage); - return false; -} - -/* - init_dma_loaf() and slice_dma_loaf() are helper functions for - aggregating the dma-mapped memory for a well-known collection of - data structures that are of different lengths. - - These routines don't guarantee any alignment. The caller must - include any space needed for alignment in the sizes of the structures - that are passed in. - */ - -static bool init_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf, - size_t len) -{ - void *cpu_addr; - dma_addr_t dma_handle; - - cpu_addr = pci_alloc_consistent(dev, len, &dma_handle); - if (cpu_addr == NULL) - return false; - - loaf->cpu_free = loaf->cpu_base = cpu_addr; - loaf->dma_free =loaf->dma_base = dma_handle; - loaf->length = len; - memset(cpu_addr, 0, len); - return true; -} - -static void *slice_dma_loaf(struct dma_loaf *loaf, size_t len, - dma_addr_t *dma_handle) -{ - void *cpu_end = loaf->cpu_free + len; - void *cpu_addr = loaf->cpu_free; - - BUG_ON(cpu_end > loaf->cpu_base + loaf->length); - *dma_handle = loaf->dma_free; - loaf->cpu_free = cpu_end; - loaf->dma_free += len; - return cpu_addr; -} - -static void free_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf_handle) -{ - if (loaf_handle->cpu_base != NULL) - pci_free_consistent(dev, loaf_handle->length, - loaf_handle->cpu_base, loaf_handle->dma_base); -} - - -/* - DAC960_CreateAuxiliaryStructures allocates and initializes the auxiliary - data structures for Controller. It returns true on success and false on - failure. -*/ - -static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int CommandAllocationLength, CommandAllocationGroupSize; - int CommandsRemaining = 0, CommandIdentifier, CommandGroupByteCount; - void *AllocationPointer = NULL; - void *ScatterGatherCPU = NULL; - dma_addr_t ScatterGatherDMA; - struct dma_pool *ScatterGatherPool; - void *RequestSenseCPU = NULL; - dma_addr_t RequestSenseDMA; - struct dma_pool *RequestSensePool = NULL; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - CommandAllocationLength = offsetof(DAC960_Command_T, V1.EndMarker); - CommandAllocationGroupSize = DAC960_V1_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V1_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V1_ScatterGatherLimit * sizeof(DAC960_V1_ScatterGatherSegment_T), - sizeof(DAC960_V1_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - Controller->ScatterGatherPool = ScatterGatherPool; - } - else - { - CommandAllocationLength = offsetof(DAC960_Command_T, V2.EndMarker); - CommandAllocationGroupSize = DAC960_V2_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V2_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V2_ScatterGatherLimit * sizeof(DAC960_V2_ScatterGatherSegment_T), - sizeof(DAC960_V2_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - RequestSensePool = dma_pool_create("DAC960_V2_RequestSense", - &Controller->PCIDevice->dev, sizeof(DAC960_SCSI_RequestSense_T), - sizeof(int), 0); - if (RequestSensePool == NULL) { - dma_pool_destroy(ScatterGatherPool); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - } - Controller->ScatterGatherPool = ScatterGatherPool; - Controller->V2.RequestSensePool = RequestSensePool; - } - Controller->CommandAllocationGroupSize = CommandAllocationGroupSize; - Controller->FreeCommands = NULL; - for (CommandIdentifier = 1; - CommandIdentifier <= Controller->DriverQueueDepth; - CommandIdentifier++) - { - DAC960_Command_T *Command; - if (--CommandsRemaining <= 0) - { - CommandsRemaining = - Controller->DriverQueueDepth - CommandIdentifier + 1; - if (CommandsRemaining > CommandAllocationGroupSize) - CommandsRemaining = CommandAllocationGroupSize; - CommandGroupByteCount = - CommandsRemaining * CommandAllocationLength; - AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC); - if (AllocationPointer == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - Command = (DAC960_Command_T *) AllocationPointer; - AllocationPointer += CommandAllocationLength; - Command->CommandIdentifier = CommandIdentifier; - Command->Controller = Controller; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; - Controller->Commands[CommandIdentifier-1] = Command; - ScatterGatherCPU = dma_pool_alloc(ScatterGatherPool, GFP_ATOMIC, - &ScatterGatherDMA); - if (ScatterGatherCPU == NULL) - return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); - - if (RequestSensePool != NULL) { - RequestSenseCPU = dma_pool_alloc(RequestSensePool, GFP_ATOMIC, - &RequestSenseDMA); - if (RequestSenseCPU == NULL) { - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, - ScatterGatherDMA); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - } - if (Controller->FirmwareType == DAC960_V1_Controller) { - Command->cmd_sglist = Command->V1.ScatterList; - Command->V1.ScatterGatherList = - (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V1.ScatterGatherListDMA = ScatterGatherDMA; - sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit); - } else { - Command->cmd_sglist = Command->V2.ScatterList; - Command->V2.ScatterGatherList = - (DAC960_V2_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V2.ScatterGatherListDMA = ScatterGatherDMA; - Command->V2.RequestSense = - (DAC960_SCSI_RequestSense_T *)RequestSenseCPU; - Command->V2.RequestSenseDMA = RequestSenseDMA; - sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit); - } - } - return true; -} - - -/* - DAC960_DestroyAuxiliaryStructures deallocates the auxiliary data - structures for Controller. -*/ - -static void DAC960_DestroyAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int i; - struct dma_pool *ScatterGatherPool = Controller->ScatterGatherPool; - struct dma_pool *RequestSensePool = NULL; - void *ScatterGatherCPU; - dma_addr_t ScatterGatherDMA; - void *RequestSenseCPU; - dma_addr_t RequestSenseDMA; - DAC960_Command_T *CommandGroup = NULL; - - - if (Controller->FirmwareType == DAC960_V2_Controller) - RequestSensePool = Controller->V2.RequestSensePool; - - Controller->FreeCommands = NULL; - for (i = 0; i < Controller->DriverQueueDepth; i++) - { - DAC960_Command_T *Command = Controller->Commands[i]; - - if (Command == NULL) - continue; - - if (Controller->FirmwareType == DAC960_V1_Controller) { - ScatterGatherCPU = (void *)Command->V1.ScatterGatherList; - ScatterGatherDMA = Command->V1.ScatterGatherListDMA; - RequestSenseCPU = NULL; - RequestSenseDMA = (dma_addr_t)0; - } else { - ScatterGatherCPU = (void *)Command->V2.ScatterGatherList; - ScatterGatherDMA = Command->V2.ScatterGatherListDMA; - RequestSenseCPU = (void *)Command->V2.RequestSense; - RequestSenseDMA = Command->V2.RequestSenseDMA; - } - if (ScatterGatherCPU != NULL) - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, ScatterGatherDMA); - if (RequestSenseCPU != NULL) - dma_pool_free(RequestSensePool, RequestSenseCPU, RequestSenseDMA); - - if ((Command->CommandIdentifier - % Controller->CommandAllocationGroupSize) == 1) { - /* - * We can't free the group of commands until all of the - * request sense and scatter gather dma structures are free. - * Remember the beginning of the group, but don't free it - * until we've reached the beginning of the next group. - */ - kfree(CommandGroup); - CommandGroup = Command; - } - Controller->Commands[i] = NULL; - } - kfree(CommandGroup); - - if (Controller->CombinedStatusBuffer != NULL) - { - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NULL; - Controller->CurrentStatusBuffer = NULL; - } - - dma_pool_destroy(ScatterGatherPool); - if (Controller->FirmwareType == DAC960_V1_Controller) - return; - - dma_pool_destroy(RequestSensePool); - - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - kfree(Controller->V2.LogicalDeviceInformation[i]); - Controller->V2.LogicalDeviceInformation[i] = NULL; - } - - for (i = 0; i < DAC960_V2_MaxPhysicalDevices; i++) - { - kfree(Controller->V2.PhysicalDeviceInformation[i]); - Controller->V2.PhysicalDeviceInformation[i] = NULL; - kfree(Controller->V2.InquiryUnitSerialNumber[i]); - Controller->V2.InquiryUnitSerialNumber[i] = NULL; - } -} - - -/* - DAC960_V1_ClearCommand clears critical fields of Command for DAC960 V1 - Firmware Controllers. -*/ - -static inline void DAC960_V1_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - Command->V1.CommandStatus = 0; -} - - -/* - DAC960_V2_ClearCommand clears critical fields of Command for DAC960 V2 - Firmware Controllers. -*/ - -static inline void DAC960_V2_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - Command->V2.CommandStatus = 0; -} - - -/* - DAC960_AllocateCommand allocates a Command structure from Controller's - free list. During driver initialization, a special initialization command - has been placed on the free list to guarantee that command allocation can - never fail. -*/ - -static inline DAC960_Command_T *DAC960_AllocateCommand(DAC960_Controller_T - *Controller) -{ - DAC960_Command_T *Command = Controller->FreeCommands; - if (Command == NULL) return NULL; - Controller->FreeCommands = Command->Next; - Command->Next = NULL; - return Command; -} - - -/* - DAC960_DeallocateCommand deallocates Command, returning it to Controller's - free list. -*/ - -static inline void DAC960_DeallocateCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - - Command->Request = NULL; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; -} - - -/* - DAC960_WaitForCommand waits for a wake_up on Controller's Command Wait Queue. -*/ - -static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) -{ - spin_unlock_irq(&Controller->queue_lock); - __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&Controller->queue_lock); -} - -/* - DAC960_GEM_QueueCommand queues Command for DAC960 GEM Series Controllers. -*/ - -static void DAC960_GEM_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_GEM_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_GEM_MemoryMailboxNewCommand(ControllerBaseAddress); - - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - -/* - DAC960_BA_QueueCommand queues Command for DAC960 BA Series Controllers. -*/ - -static void DAC960_BA_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_BA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_BA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LP_QueueCommand queues Command for DAC960 LP Series Controllers. -*/ - -static void DAC960_LP_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LP_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LP_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandDualMode queues Command for DAC960 LA Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandSingleMode queues Command for DAC960 LA Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandDualMode queues Command for DAC960 PG Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandSingleMode queues Command for DAC960 PG Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PD_QueueCommand queues Command for DAC960 PD Series Controllers. -*/ - -static void DAC960_PD_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_P_QueueCommand queues Command for DAC960 P Series Controllers. -*/ - -static void DAC960_P_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - switch (CommandMailbox->Common.CommandOpcode) - { - case DAC960_V1_Enquiry: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Enquiry_Old; - break; - case DAC960_V1_GetDeviceState: - CommandMailbox->Common.CommandOpcode = DAC960_V1_GetDeviceState_Old; - break; - case DAC960_V1_Read: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Read_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Write_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_ExecuteCommand executes Command and waits for completion. -*/ - -static void DAC960_ExecuteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DECLARE_COMPLETION_ONSTACK(Completion); - unsigned long flags; - Command->Completion = &Completion; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - if (in_interrupt()) - return; - wait_for_completion(&Completion); -} - - -/* - DAC960_V1_ExecuteType3 executes a DAC960 V1 Firmware Controller Type 3 - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3.CommandOpcode = CommandOpcode; - CommandMailbox->Type3.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteTypeB executes a DAC960 V1 Firmware Controller Type 3B - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3B(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char CommandOpcode2, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3B.CommandOpcode = CommandOpcode; - CommandMailbox->Type3B.CommandOpcode2 = CommandOpcode2; - CommandMailbox->Type3B.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteType3D executes a DAC960 V1 Firmware Controller Type 3D - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3D(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char Channel, - unsigned char TargetID, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3D.CommandOpcode = CommandOpcode; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V2_GeneralInfo executes a DAC960 V2 Firmware General Information - Reading IOCTL Command and waits for completion. It returns true on success - and false on failure. - - Return data in The controller's HealthStatusBuffer, which is dma-able memory -*/ - -static bool DAC960_V2_GeneralInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->Common.DataTransferSize = sizeof(DAC960_V2_HealthStatusBuffer_T); - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_GetHealthStatus; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ControllerInfo executes a DAC960 V2 Firmware Controller - Information Reading IOCTL Command and waits for completion. It returns - true on success and false on failure. - - Data is returned in the controller's V2.NewControllerInformation dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewControllerInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_LogicalDeviceInfo executes a DAC960 V2 Firmware Controller Logical - Device Information Reading IOCTL Command and waits for completion. It - returns true on success and false on failure. - - Data is returned in the controller's V2.NewLogicalDeviceInformation -*/ - -static bool DAC960_V2_NewLogicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned short LogicalDeviceNumber) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->LogicalDeviceInfo.CommandOpcode = - DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_PhysicalDeviceInfo executes a DAC960 V2 Firmware Controller "Read - Physical Device Information" IOCTL Command and waits for completion. It - returns true on success and false on failure. - - The Channel, TargetID, LogicalUnit arguments should be 0 the first time - this function is called for a given controller. This will return data - for the "first" device on that controller. The returned data includes a - Channel, TargetID, LogicalUnit that can be passed in to this routine to - get data for the NEXT device on that controller. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. - -*/ - -static bool DAC960_V2_NewPhysicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned char Channel, - unsigned char TargetID, - unsigned char LogicalUnit) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -static void DAC960_V2_ConstructNewUnitSerialNumber( - DAC960_Controller_T *Controller, - DAC960_V2_CommandMailbox_T *CommandMailbox, int Channel, int TargetID, - int LogicalUnit) -{ - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10_Passthru; - CommandMailbox->SCSI_10.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->SCSI_10.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->SCSI_10.DataTransferSize = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->SCSI_10.PhysicalDevice.TargetID = TargetID; - CommandMailbox->SCSI_10.PhysicalDevice.Channel = Channel; - CommandMailbox->SCSI_10.CDBLength = 6; - CommandMailbox->SCSI_10.SCSI_CDB[0] = 0x12; /* INQUIRY */ - CommandMailbox->SCSI_10.SCSI_CDB[1] = 1; /* EVPD = 1 */ - CommandMailbox->SCSI_10.SCSI_CDB[2] = 0x80; /* Page Code */ - CommandMailbox->SCSI_10.SCSI_CDB[3] = 0; /* Reserved */ - CommandMailbox->SCSI_10.SCSI_CDB[4] = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.SCSI_CDB[5] = 0; /* Control */ - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewInquiryUnitSerialNumberDMA; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; -} - - -/* - DAC960_V2_NewUnitSerialNumber executes an SCSI pass-through - Inquiry command to a SCSI device identified by Channel number, - Target id, Logical Unit Number. This function Waits for completion - of the command. - - The return data includes Unit Serial Number information for the - specified device. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewInquiryUnitSerialNumber(DAC960_Controller_T *Controller, - int Channel, int TargetID, int LogicalUnit) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - Command = DAC960_AllocateCommand(Controller); - CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Channel, TargetID, LogicalUnit); - - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_DeviceOperation executes a DAC960 V2 Firmware Controller Device - Operation IOCTL Command and waits for completion. It returns true on - success and false on failure. -*/ - -static bool DAC960_V2_DeviceOperation(DAC960_Controller_T *Controller, - DAC960_V2_IOCTL_Opcode_T IOCTL_Opcode, - DAC960_V2_OperationDevice_T - OperationDevice) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->DeviceOperation.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->DeviceOperation.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->DeviceOperation.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->DeviceOperation.IOCTL_Opcode = IOCTL_Opcode; - CommandMailbox->DeviceOperation.OperationDevice = OperationDevice; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V1 Firmware Controllers. - - PD and P controller types have no memory mailbox, but still need the - other dma mapped memory. -*/ - -static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_HardwareType_T hw_type = Controller->HardwareType; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V1_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V1_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V1_CommandMailbox_T CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - int TimeoutCounter; - int i; - - memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) { - CommandMailboxesSize = 0; - StatusMailboxesSize = 0; - } else { - CommandMailboxesSize = DAC960_V1_CommandMailboxCount * sizeof(DAC960_V1_CommandMailbox_T); - StatusMailboxesSize = DAC960_V1_StatusMailboxCount * sizeof(DAC960_V1_StatusMailbox_T); - } - DmaPagesSize = CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V1_DCDB_T) + sizeof(DAC960_V1_Enquiry_T) + - sizeof(DAC960_V1_ErrorTable_T) + sizeof(DAC960_V1_EventLogEntry_T) + - sizeof(DAC960_V1_RebuildProgress_T) + - sizeof(DAC960_V1_LogicalDriveInformationArray_T) + - sizeof(DAC960_V1_BackgroundInitializationStatus_T) + - sizeof(DAC960_V1_DeviceState_T) + sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) - return false; - - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - goto skip_mailboxes; - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V1.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V1.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V1_CommandMailboxCount - 1; - Controller->V1.LastCommandMailbox = CommandMailboxesMemory; - Controller->V1.NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.PreviousCommandMailbox1 = Controller->V1.LastCommandMailbox; - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V1.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V1.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V1_StatusMailboxCount - 1; - Controller->V1.LastStatusMailbox = StatusMailboxesMemory; - Controller->V1.NextStatusMailbox = Controller->V1.FirstStatusMailbox; - -skip_mailboxes: - Controller->V1.MonitoringDCDB = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DCDB_T), - &Controller->V1.MonitoringDCDB_DMA); - - Controller->V1.NewEnquiry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_Enquiry_T), - &Controller->V1.NewEnquiryDMA); - - Controller->V1.NewErrorTable = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_ErrorTable_T), - &Controller->V1.NewErrorTableDMA); - - Controller->V1.EventLogEntry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_EventLogEntry_T), - &Controller->V1.EventLogEntryDMA); - - Controller->V1.RebuildProgress = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_RebuildProgress_T), - &Controller->V1.RebuildProgressDMA); - - Controller->V1.NewLogicalDriveInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_LogicalDriveInformationArray_T), - &Controller->V1.NewLogicalDriveInformationDMA); - - Controller->V1.BackgroundInitializationStatus = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_BackgroundInitializationStatus_T), - &Controller->V1.BackgroundInitializationStatusDMA); - - Controller->V1.NewDeviceState = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DeviceState_T), - &Controller->V1.NewDeviceStateDMA); - - Controller->V1.NewInquiryStandardData = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_T), - &Controller->V1.NewInquiryStandardDataDMA); - - Controller->V1.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V1.NewInquiryUnitSerialNumberDMA); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - return true; - - /* Enable the Memory Mailbox Interface. */ - Controller->V1.DualModeMemoryMailboxInterface = true; - CommandMailbox.TypeX.CommandOpcode = 0x2B; - CommandMailbox.TypeX.CommandIdentifier = 0; - CommandMailbox.TypeX.CommandOpcode2 = 0x14; - CommandMailbox.TypeX.CommandMailboxesBusAddress = - Controller->V1.FirstCommandMailboxDMA; - CommandMailbox.TypeX.StatusMailboxesBusAddress = - Controller->V1.FirstStatusMailboxDMA; -#define TIMEOUT_COUNT 1000000 - - for (i = 0; i < 2; i++) - switch (Controller->HardwareType) - { - case DAC960_LA_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_LA_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_LA_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_LA_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_LA_ReadStatusRegister(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - case DAC960_PG_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_PG_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_PG_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_PG_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_PG_ReadStatusRegister(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - break; - } - return false; -} - - -/* - DAC960_V2_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V2 Firmware Controllers. - - Aggregate the space needed for the controller's memory mailbox and - the other data structures that will be targets of dma transfers with - the controller. Allocate a dma-mapped region of memory to hold these - structures. Then, save CPU pointers and dma_addr_t values to reference - the structures that are contained in that region. -*/ - -static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V2_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V2_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V2_CommandMailbox_T *CommandMailbox; - dma_addr_t CommandMailboxDMA; - DAC960_V2_CommandStatus_T CommandStatus; - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) && - pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - /* This is a temporary dma mapping, used only in the scope of this function */ - CommandMailbox = pci_alloc_consistent(PCI_Device, - sizeof(DAC960_V2_CommandMailbox_T), &CommandMailboxDMA); - if (CommandMailbox == NULL) - return false; - - CommandMailboxesSize = DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T); - StatusMailboxesSize = DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T); - DmaPagesSize = - CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V2_HealthStatusBuffer_T) + - sizeof(DAC960_V2_ControllerInfo_T) + - sizeof(DAC960_V2_LogicalDeviceInfo_T) + - sizeof(DAC960_V2_PhysicalDeviceInfo_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T) + - sizeof(DAC960_V2_Event_T) + - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) { - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return false; - } - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V2.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V2.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V2_CommandMailboxCount - 1; - Controller->V2.LastCommandMailbox = CommandMailboxesMemory; - Controller->V2.NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.PreviousCommandMailbox1 = Controller->V2.LastCommandMailbox; - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V2.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V2.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V2_StatusMailboxCount - 1; - Controller->V2.LastStatusMailbox = StatusMailboxesMemory; - Controller->V2.NextStatusMailbox = Controller->V2.FirstStatusMailbox; - - Controller->V2.HealthStatusBuffer = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_HealthStatusBuffer_T), - &Controller->V2.HealthStatusBufferDMA); - - Controller->V2.NewControllerInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_ControllerInfo_T), - &Controller->V2.NewControllerInformationDMA); - - Controller->V2.NewLogicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_LogicalDeviceInfo_T), - &Controller->V2.NewLogicalDeviceInformationDMA); - - Controller->V2.NewPhysicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalDeviceInfo_T), - &Controller->V2.NewPhysicalDeviceInformationDMA); - - Controller->V2.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V2.NewInquiryUnitSerialNumberDMA); - - Controller->V2.Event = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_Event_T), - &Controller->V2.EventDMA); - - Controller->V2.PhysicalToLogicalDevice = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalToLogicalDevice_T), - &Controller->V2.PhysicalToLogicalDeviceDMA); - - /* - Enable the Memory Mailbox Interface. - - I don't know why we can't just use one of the memory mailboxes - we just allocated to do this, instead of using this temporary one. - Try this change later. - */ - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - CommandMailbox->SetMemoryMailbox.CommandIdentifier = 1; - CommandMailbox->SetMemoryMailbox.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->SetMemoryMailbox.CommandControlBits.NoAutoRequestSense = true; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxSizeKB = - (DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxSizeKB = - (DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.SecondCommandMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.SecondStatusMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.RequestSenseSize = 0; - CommandMailbox->SetMemoryMailbox.IOCTL_Opcode = DAC960_V2_SetMemoryMailbox; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferSizeKB = 1; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferBusAddress = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxBusAddress = - Controller->V2.FirstCommandMailboxDMA; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxBusAddress = - Controller->V2.FirstStatusMailboxDMA; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - while (DAC960_GEM_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_GEM_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_GEM_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_GEM_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_GEM_ReadCommandStatus(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_BA_Controller: - while (DAC960_BA_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_BA_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_BA_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_BA_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_BA_ReadCommandStatus(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_LP_Controller: - while (DAC960_LP_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_LP_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_LP_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_LP_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_LP_ReadCommandStatus(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - CommandStatus = DAC960_V2_AbormalCompletion; - break; - } - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_ReadControllerConfiguration reads the Configuration Information - from DAC960 V1 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V1_Enquiry2_T *Enquiry2; - dma_addr_t Enquiry2DMA; - DAC960_V1_Config2_T *Config2; - dma_addr_t Config2DMA; - int LogicalDriveNumber, Channel, TargetID; - struct dma_loaf local_dma; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - sizeof(DAC960_V1_Enquiry2_T) + sizeof(DAC960_V1_Config2_T))) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - - Enquiry2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Enquiry2_T), &Enquiry2DMA); - Config2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Config2_T), &Config2DMA); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry, - Controller->V1.NewEnquiryDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY"); - } - memcpy(&Controller->V1.Enquiry, Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry2, Enquiry2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_ReadConfig2, Config2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "READ CONFIG2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_GetLogicalDriveInformation, - Controller->V1.NewLogicalDriveInformationDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET LOGICAL DRIVE INFORMATION"); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - - for (Channel = 0; Channel < Enquiry2->ActualChannels; Channel++) - for (TargetID = 0; TargetID < Enquiry2->MaxTargets; TargetID++) { - if (!DAC960_V1_ExecuteType3D(Controller, DAC960_V1_GetDeviceState, - Channel, TargetID, - Controller->V1.NewDeviceStateDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET DEVICE STATE"); - } - memcpy(&Controller->V1.DeviceState[Channel][TargetID], - Controller->V1.NewDeviceState, sizeof(DAC960_V1_DeviceState_T)); - } - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - switch (Enquiry2->HardwareID.SubModel) - { - case DAC960_V1_P_PD_PU: - if (Enquiry2->SCSICapability.BusSpeed == DAC960_V1_Ultra) - strcpy(Controller->ModelName, "DAC960PU"); - else strcpy(Controller->ModelName, "DAC960PD"); - break; - case DAC960_V1_PL: - strcpy(Controller->ModelName, "DAC960PL"); - break; - case DAC960_V1_PG: - strcpy(Controller->ModelName, "DAC960PG"); - break; - case DAC960_V1_PJ: - strcpy(Controller->ModelName, "DAC960PJ"); - break; - case DAC960_V1_PR: - strcpy(Controller->ModelName, "DAC960PR"); - break; - case DAC960_V1_PT: - strcpy(Controller->ModelName, "DAC960PT"); - break; - case DAC960_V1_PTL0: - strcpy(Controller->ModelName, "DAC960PTL0"); - break; - case DAC960_V1_PRL: - strcpy(Controller->ModelName, "DAC960PRL"); - break; - case DAC960_V1_PTL1: - strcpy(Controller->ModelName, "DAC960PTL1"); - break; - case DAC960_V1_1164P: - strcpy(Controller->ModelName, "DAC1164P"); - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "MODEL VERIFICATION"); - } - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field and verify that it - is a supported firmware version. The supported firmware versions are: - - DAC1164P 5.06 and above - DAC960PTL/PRL/PJ/PG 4.06 and above - DAC960PU/PD/PL 3.51 and above - DAC960PU/PD/PL/P 2.73 and above - */ -#if defined(CONFIG_ALPHA) - /* - DEC Alpha machines were often equipped with DAC960 cards that were - OEMed from Mylex, and had their own custom firmware. Version 2.70, - the last custom FW revision to be released by DEC for these older - controllers, appears to work quite well with this driver. - - Cards tested successfully were several versions each of the PD and - PU, called by DEC the KZPSC and KZPAC, respectively, and having - the Manufacturer Numbers (from Mylex), usually on a sticker on the - back of the board, of: - - KZPSC: D040347 (1-channel) or D040348 (2-channel) or D040349 (3-channel) - KZPAC: D040395 (1-channel) or D040396 (2-channel) or D040397 (3-channel) - */ -# define FIRMWARE_27X "2.70" -#else -# define FIRMWARE_27X "2.73" -#endif - - if (Enquiry2->FirmwareID.MajorVersion == 0) - { - Enquiry2->FirmwareID.MajorVersion = - Controller->V1.Enquiry.MajorFirmwareVersion; - Enquiry2->FirmwareID.MinorVersion = - Controller->V1.Enquiry.MinorFirmwareVersion; - Enquiry2->FirmwareID.FirmwareType = '0'; - Enquiry2->FirmwareID.TurnID = 0; - } - snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion), - "%d.%02d-%c-%02d", - Enquiry2->FirmwareID.MajorVersion, - Enquiry2->FirmwareID.MinorVersion, - Enquiry2->FirmwareID.FirmwareType, - Enquiry2->FirmwareID.TurnID); - if (!((Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.06") >= 0) || - (Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.06") >= 0) || - (Controller->FirmwareVersion[0] == '3' && - strcmp(Controller->FirmwareVersion, "3.51") >= 0) || - (Controller->FirmwareVersion[0] == '2' && - strcmp(Controller->FirmwareVersion, FIRMWARE_27X) >= 0))) - { - DAC960_Failure(Controller, "FIRMWARE VERSION VERIFICATION"); - DAC960_Error("Firmware Version = '%s'\n", Controller, - Controller->FirmwareVersion); - free_dma_loaf(Controller->PCIDevice, &local_dma); - return false; - } - /* - Initialize the Controller Channels, Targets, Memory Size, and SAF-TE - Enclosure Management Enabled fields. - */ - Controller->Channels = Enquiry2->ActualChannels; - Controller->Targets = Enquiry2->MaxTargets; - Controller->MemorySize = Enquiry2->MemorySize >> 20; - Controller->V1.SAFTE_EnclosureManagementEnabled = - (Enquiry2->FaultManagementType == DAC960_V1_SAFTE); - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = Controller->V1.Enquiry.MaxCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = - Controller->V1.Enquiry.NumberOfLogicalDrives; - Controller->MaxBlocksPerCommand = Enquiry2->MaxBlocksPerCommand; - Controller->ControllerScatterGatherLimit = Enquiry2->MaxScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V1_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V1_ScatterGatherLimit; - /* - Initialize the Stripe Size, Segment Size, and Geometry Translation. - */ - Controller->V1.StripeSize = Config2->BlocksPerStripe * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - Controller->V1.SegmentSize = Config2->BlocksPerCacheLine * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - switch (Config2->DriveGeometry) - { - case DAC960_V1_Geometry_128_32: - Controller->V1.GeometryTranslationHeads = 128; - Controller->V1.GeometryTranslationSectors = 32; - break; - case DAC960_V1_Geometry_255_63: - Controller->V1.GeometryTranslationHeads = 255; - Controller->V1.GeometryTranslationSectors = 63; - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "CONFIG2 DRIVE GEOMETRY"); - } - /* - Initialize the Background Initialization Status. - */ - if ((Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.08") >= 0) || - (Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.08") >= 0)) - { - Controller->V1.BackgroundInitializationStatusSupported = true; - DAC960_V1_ExecuteType3B(Controller, - DAC960_V1_BackgroundInitializationControl, 0x20, - Controller-> - V1.BackgroundInitializationStatusDMA); - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - } - /* - Initialize the Logical Drive Initially Accessible flag. - */ - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - if (Controller->V1.LogicalDriveInformation - [LogicalDriveNumber].LogicalDriveState != - DAC960_V1_LogicalDrive_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDriveNumber] = true; - Controller->V1.LastRebuildStatus = DAC960_V1_NoRebuildOrCheckInProgress; - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadControllerConfiguration reads the Configuration Information - from DAC960 V2 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V2_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned short LogicalDeviceNumber = 0; - int ModelNameLength; - - /* Get data into dma-able area, then copy into permanent location */ - if (!DAC960_V2_NewControllerInfo(Controller)) - return DAC960_Failure(Controller, "GET CONTROLLER INFO"); - memcpy(ControllerInfo, Controller->V2.NewControllerInformation, - sizeof(DAC960_V2_ControllerInfo_T)); - - - if (!DAC960_V2_GeneralInfo(Controller)) - return DAC960_Failure(Controller, "GET HEALTH STATUS"); - - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - ModelNameLength = sizeof(ControllerInfo->ControllerName); - if (ModelNameLength > sizeof(Controller->ModelName)-1) - ModelNameLength = sizeof(Controller->ModelName)-1; - memcpy(Controller->ModelName, ControllerInfo->ControllerName, - ModelNameLength); - ModelNameLength--; - while (Controller->ModelName[ModelNameLength] == ' ' || - Controller->ModelName[ModelNameLength] == '\0') - ModelNameLength--; - Controller->ModelName[++ModelNameLength] = '\0'; - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field. - */ - sprintf(Controller->FirmwareVersion, "%d.%02d-%02d", - ControllerInfo->FirmwareMajorVersion, - ControllerInfo->FirmwareMinorVersion, - ControllerInfo->FirmwareTurnNumber); - if (ControllerInfo->FirmwareMajorVersion == 6 && - ControllerInfo->FirmwareMinorVersion == 0 && - ControllerInfo->FirmwareTurnNumber < 1) - { - DAC960_Info("FIRMWARE VERSION %s DOES NOT PROVIDE THE CONTROLLER\n", - Controller, Controller->FirmwareVersion); - DAC960_Info("STATUS MONITORING FUNCTIONALITY NEEDED BY THIS DRIVER.\n", - Controller); - DAC960_Info("PLEASE UPGRADE TO VERSION 6.00-01 OR ABOVE.\n", - Controller); - } - /* - Initialize the Controller Channels, Targets, and Memory Size. - */ - Controller->Channels = ControllerInfo->NumberOfPhysicalChannelsPresent; - Controller->Targets = - ControllerInfo->MaximumTargetsPerChannel - [ControllerInfo->NumberOfPhysicalChannelsPresent-1]; - Controller->MemorySize = ControllerInfo->MemorySizeMB; - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = ControllerInfo->MaximumParallelCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = ControllerInfo->LogicalDevicesPresent; - Controller->MaxBlocksPerCommand = - ControllerInfo->MaximumDataTransferSizeInBlocks; - Controller->ControllerScatterGatherLimit = - ControllerInfo->MaximumScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V2_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V2_ScatterGatherLimit; - /* - Initialize the Logical Device Information. - */ - while (true) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo; - DAC960_V2_PhysicalDevice_T PhysicalDevice; - - if (!DAC960_V2_NewLogicalDeviceInfo(Controller, LogicalDeviceNumber)) - break; - LogicalDeviceNumber = NewLogicalDeviceInfo->LogicalDeviceNumber; - if (LogicalDeviceNumber >= DAC960_MaxLogicalDrives) { - DAC960_Error("DAC960: Logical Drive Number %d not supported\n", - Controller, LogicalDeviceNumber); - break; - } - if (NewLogicalDeviceInfo->DeviceBlockSizeInBytes != DAC960_BlockSize) { - DAC960_Error("DAC960: Logical Drive Block Size %d not supported\n", - Controller, NewLogicalDeviceInfo->DeviceBlockSizeInBytes); - LogicalDeviceNumber++; - continue; - } - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - if (NewLogicalDeviceInfo->LogicalDeviceState != - DAC960_V2_LogicalDevice_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDeviceNumber] = true; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - if (LogicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - LogicalDeviceNumber++; - } - return true; -} - - -/* - DAC960_ReportControllerConfiguration reports the Configuration Information - for Controller. -*/ - -static bool DAC960_ReportControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_Info("Configuring Mylex %s PCI RAID Controller\n", - Controller, Controller->ModelName); - DAC960_Info(" Firmware Version: %s, Channels: %d, Memory Size: %dMB\n", - Controller, Controller->FirmwareVersion, - Controller->Channels, Controller->MemorySize); - DAC960_Info(" PCI Bus: %d, Device: %d, Function: %d, I/O Address: ", - Controller, Controller->Bus, - Controller->Device, Controller->Function); - if (Controller->IO_Address == 0) - DAC960_Info("Unassigned\n", Controller); - else DAC960_Info("0x%X\n", Controller, Controller->IO_Address); - DAC960_Info(" PCI Address: 0x%X mapped at 0x%lX, IRQ Channel: %d\n", - Controller, Controller->PCI_Address, - (unsigned long) Controller->BaseAddress, - Controller->IRQ_Channel); - DAC960_Info(" Controller Queue Depth: %d, " - "Maximum Blocks per Command: %d\n", - Controller, Controller->ControllerQueueDepth, - Controller->MaxBlocksPerCommand); - DAC960_Info(" Driver Queue Depth: %d, " - "Scatter/Gather Limit: %d of %d Segments\n", - Controller, Controller->DriverQueueDepth, - Controller->DriverScatterGatherLimit, - Controller->ControllerScatterGatherLimit); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Info(" Stripe Size: %dKB, Segment Size: %dKB, " - "BIOS Geometry: %d/%d\n", Controller, - Controller->V1.StripeSize, - Controller->V1.SegmentSize, - Controller->V1.GeometryTranslationHeads, - Controller->V1.GeometryTranslationSectors); - if (Controller->V1.SAFTE_EnclosureManagementEnabled) - DAC960_Info(" SAF-TE Enclosure Management Enabled\n", Controller); - } - return true; -} - - -/* - DAC960_V1_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V1 Firmware Controllers by requesting the SCSI Inquiry and SCSI - Inquiry Unit Serial Number information for each device connected to - Controller. -*/ - -static bool DAC960_V1_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - struct dma_loaf local_dma; - - dma_addr_t DCDBs_dma[DAC960_V1_MaxChannels]; - DAC960_V1_DCDB_T *DCDBs_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_Inquiry_dma[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_T *SCSI_Inquiry_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_NewInquiryUnitSerialNumberDMA[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *SCSI_NewInquiryUnitSerialNumberCPU[DAC960_V1_MaxChannels]; - - struct completion Completions[DAC960_V1_MaxChannels]; - unsigned long flags; - int Channel, TargetID; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - DAC960_V1_MaxChannels*(sizeof(DAC960_V1_DCDB_T) + - sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)))) - return DAC960_Failure(Controller, - "DMA ALLOCATION FAILED IN ReadDeviceConfiguration"); - - for (Channel = 0; Channel < Controller->Channels; Channel++) { - DCDBs_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_V1_DCDB_T), DCDBs_dma + Channel); - SCSI_Inquiry_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_T), - SCSI_Inquiry_dma + Channel); - SCSI_NewInquiryUnitSerialNumberCPU[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - SCSI_NewInquiryUnitSerialNumberDMA + Channel); - } - - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - /* - * For each channel, submit a probe for a device on that channel. - * The timeout interval for a device that is present is 10 seconds. - * With this approach, the timeout periods can elapse in parallel - * on each channel. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - dma_addr_t NewInquiryStandardDataDMA = SCSI_Inquiry_dma[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - dma_addr_t DCDB_dma = DCDBs_dma[Channel]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - struct completion *Completion = &Completions[Channel]; - - init_completion(Completion); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - Command->Completion = Completion; - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_dma; - DCDB->Channel = Channel; - DCDB->TargetID = TargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - /* - * Wait for the problems submitted in the previous loop - * to complete. On the probes that are successful, - * get the serial number of the device that was found. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_T *NewInquiryStandardData = SCSI_Inquiry_cpu[Channel]; - dma_addr_t NewInquiryUnitSerialNumberDMA = - SCSI_NewInquiryUnitSerialNumberDMA[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - SCSI_NewInquiryUnitSerialNumberCPU[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - struct completion *Completion = &Completions[Channel]; - - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryStandardData, 0, sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - continue; - } else - memcpy(InquiryStandardData, NewInquiryStandardData, sizeof(DAC960_SCSI_Inquiry_T)); - - /* Preserve Channel and TargetID values from the previous loop */ - Command->Completion = Completion; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - } - } - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V2 Firmware Controllers by requesting the Physical Device - Information and SCSI Inquiry Unit Serial Number information for each - device connected to Controller. -*/ - -static bool DAC960_V2_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - unsigned char Channel = 0, TargetID = 0, LogicalUnit = 0; - unsigned short PhysicalDeviceIndex = 0; - - while (true) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber; - - if (!DAC960_V2_NewPhysicalDeviceInfo(Controller, Channel, TargetID, LogicalUnit)) - break; - - PhysicalDeviceInfo = kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), - GFP_ATOMIC); - if (PhysicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "PHYSICAL DEVICE ALLOCATION"); - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex] = - PhysicalDeviceInfo; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - - InquiryUnitSerialNumber = kmalloc( - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL) { - kfree(PhysicalDeviceInfo); - return DAC960_Failure(Controller, "SERIAL NUMBER ALLOCATION"); - } - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - - Channel = NewPhysicalDeviceInfo->Channel; - TargetID = NewPhysicalDeviceInfo->TargetID; - LogicalUnit = NewPhysicalDeviceInfo->LogicalUnit; - - /* - Some devices do NOT have Unit Serial Numbers. - This command fails for them. But, we still want to - remember those devices are there. Construct a - UnitSerialNumber structure for the failure case. - */ - if (!DAC960_V2_NewInquiryUnitSerialNumber(Controller, Channel, TargetID, LogicalUnit)) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - PhysicalDeviceIndex++; - LogicalUnit++; - } - return true; -} - - -/* - DAC960_SanitizeInquiryData sanitizes the Vendor, Model, Revision, and - Product Serial Number fields of the Inquiry Standard Data and Inquiry - Unit Serial Number structures. -*/ - -static void DAC960_SanitizeInquiryData(DAC960_SCSI_Inquiry_T - *InquiryStandardData, - DAC960_SCSI_Inquiry_UnitSerialNumber_T - *InquiryUnitSerialNumber, - unsigned char *Vendor, - unsigned char *Model, - unsigned char *Revision, - unsigned char *SerialNumber) -{ - int SerialNumberLength, i; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) return; - for (i = 0; i < sizeof(InquiryStandardData->VendorIdentification); i++) - { - unsigned char VendorCharacter = - InquiryStandardData->VendorIdentification[i]; - Vendor[i] = (VendorCharacter >= ' ' && VendorCharacter <= '~' - ? VendorCharacter : ' '); - } - Vendor[sizeof(InquiryStandardData->VendorIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductIdentification); i++) - { - unsigned char ModelCharacter = - InquiryStandardData->ProductIdentification[i]; - Model[i] = (ModelCharacter >= ' ' && ModelCharacter <= '~' - ? ModelCharacter : ' '); - } - Model[sizeof(InquiryStandardData->ProductIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductRevisionLevel); i++) - { - unsigned char RevisionCharacter = - InquiryStandardData->ProductRevisionLevel[i]; - Revision[i] = (RevisionCharacter >= ' ' && RevisionCharacter <= '~' - ? RevisionCharacter : ' '); - } - Revision[sizeof(InquiryStandardData->ProductRevisionLevel)] = '\0'; - if (InquiryUnitSerialNumber->PeripheralDeviceType == 0x1F) return; - SerialNumberLength = InquiryUnitSerialNumber->PageLength; - if (SerialNumberLength > - sizeof(InquiryUnitSerialNumber->ProductSerialNumber)) - SerialNumberLength = sizeof(InquiryUnitSerialNumber->ProductSerialNumber); - for (i = 0; i < SerialNumberLength; i++) - { - unsigned char SerialNumberCharacter = - InquiryUnitSerialNumber->ProductSerialNumber[i]; - SerialNumber[i] = - (SerialNumberCharacter >= ' ' && SerialNumberCharacter <= '~' - ? SerialNumberCharacter : ' '); - } - SerialNumber[SerialNumberLength] = '\0'; -} - - -/* - DAC960_V1_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V1 Firmware Controllers. -*/ - -static bool DAC960_V1_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int LogicalDriveNumber, Channel, TargetID; - DAC960_Info(" Physical Devices:\n", Controller); - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *ErrorEntry = - &Controller->V1.ErrorTable.ErrorTableEntries[Channel][TargetID]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber - ->ProductSerialNumber)]; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) continue; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, Channel, TargetID, (TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType) - { - if (Controller->V1.DeviceResetCount[Channel][TargetID] > 0) - DAC960_Info(" Disk Status: %s, %u blocks, %d resets\n", - Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize, - Controller->V1.DeviceResetCount[Channel][TargetID]); - else - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize); - } - if (ErrorEntry->ParityErrorCount > 0 || - ErrorEntry->SoftErrorCount > 0 || - ErrorEntry->HardErrorCount > 0 || - ErrorEntry->MiscErrorCount > 0) - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - ErrorEntry->ParityErrorCount, - ErrorEntry->SoftErrorCount, - ErrorEntry->HardErrorCount, - ErrorEntry->MiscErrorCount); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *LogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks, %s\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDriveInformation->RAIDLevel, - (LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "Online" - : LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "Critical" : "Offline"), - LogicalDriveInformation->LogicalDriveSize, - (LogicalDriveInformation->WriteBack - ? "Write Back" : "Write Thru")); - } - return true; -} - - -/* - DAC960_V2_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V2 Firmware Controllers. -*/ - -static bool DAC960_V2_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int PhysicalDeviceIndex, LogicalDriveNumber; - DAC960_Info(" Physical Devices:\n", Controller); - for (PhysicalDeviceIndex = 0; - PhysicalDeviceIndex < DAC960_V2_MaxPhysicalDevices; - PhysicalDeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_T *InquiryStandardData = - (DAC960_SCSI_Inquiry_T *) &PhysicalDeviceInfo->SCSI_InquiryData; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber->ProductSerialNumber)]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo->TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers == 0) - DAC960_Info(" %sAsynchronous\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :"")); - else - DAC960_Info(" %sSynchronous at %d MB/sec\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :""), - (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers - * PhysicalDeviceInfo->NegotiatedDataWidthBits/8)); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (PhysicalDeviceInfo->PhysicalDeviceState == - DAC960_V2_Device_Unconfigured) - continue; - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "Online" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "Rebuild" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "Missing" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "Critical" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "Suspected-Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "Commanded-Offline" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "Standby" : "Unknown"), - PhysicalDeviceInfo->ConfigurableDeviceSize); - if (PhysicalDeviceInfo->ParityErrors == 0 && - PhysicalDeviceInfo->SoftErrors == 0 && - PhysicalDeviceInfo->HardErrors == 0 && - PhysicalDeviceInfo->MiscellaneousErrors == 0 && - PhysicalDeviceInfo->CommandTimeouts == 0 && - PhysicalDeviceInfo->Retries == 0 && - PhysicalDeviceInfo->Aborts == 0 && - PhysicalDeviceInfo->PredictedFailuresDetected == 0) - continue; - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - PhysicalDeviceInfo->ParityErrors, - PhysicalDeviceInfo->SoftErrors, - PhysicalDeviceInfo->HardErrors, - PhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Info(" Timeouts: %d, Retries: %d, " - "Aborts: %d, Predicted: %d\n", Controller, - PhysicalDeviceInfo->CommandTimeouts, - PhysicalDeviceInfo->Retries, - PhysicalDeviceInfo->Aborts, - PhysicalDeviceInfo->PredictedFailuresDetected); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - static const unsigned char *ReadCacheStatus[] = { - "Read Cache Disabled", - "Read Cache Enabled", - "Read Ahead Enabled", - "Intelligent Read Ahead Enabled", - "-", "-", "-", "-" - }; - static const unsigned char *WriteCacheStatus[] = { - "Write Cache Disabled", - "Logical Device Read Only", - "Write Cache Enabled", - "Intelligent Write Cache Enabled", - "-", "-", "-", "-" - }; - unsigned char *GeometryTranslation; - if (LogicalDeviceInfo == NULL) continue; - switch (LogicalDeviceInfo->DriveGeometry) - { - case DAC960_V2_Geometry_128_32: - GeometryTranslation = "128/32"; - break; - case DAC960_V2_Geometry_255_63: - GeometryTranslation = "255/63"; - break; - default: - GeometryTranslation = "Invalid"; - DAC960_Error("Illegal Logical Device Geometry %d\n", - Controller, LogicalDeviceInfo->DriveGeometry); - break; - } - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDeviceInfo->RAIDLevel, - (LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "Online" - : LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "Critical" : "Offline"), - LogicalDeviceInfo->ConfigurableDeviceSize); - DAC960_Info(" Logical Device %s, BIOS Geometry: %s\n", - Controller, - (LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Initialized" : "Uninitialized"), - GeometryTranslation); - if (LogicalDeviceInfo->StripeSize == 0) - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: N/A\n", Controller); - else - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - else - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: N/A\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2)); - else - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2), - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - DAC960_Info(" %s, %s\n", Controller, - ReadCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.ReadCache], - WriteCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.WriteCache]); - if (LogicalDeviceInfo->SoftErrors > 0 || - LogicalDeviceInfo->CommandsFailed > 0 || - LogicalDeviceInfo->DeferredWriteErrors) - DAC960_Info(" Errors - Soft: %d, Failed: %d, " - "Deferred Write: %d\n", Controller, - LogicalDeviceInfo->SoftErrors, - LogicalDeviceInfo->CommandsFailed, - LogicalDeviceInfo->DeferredWriteErrors); - - } - return true; -} - -/* - DAC960_RegisterBlockDevice registers the Block Device structures - associated with Controller. -*/ - -static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int n; - - /* - Register the Block Device Major Number for this DAC960 Controller. - */ - if (register_blkdev(MajorNumber, "dac960") < 0) - return false; - - for (n = 0; n < DAC960_MaxLogicalDrives; n++) { - struct gendisk *disk = Controller->disks[n]; - struct request_queue *RequestQueue; - - /* for now, let all request queues share controller's lock */ - RequestQueue = blk_init_queue(DAC960_RequestFunction,&Controller->queue_lock); - if (!RequestQueue) { - printk("DAC960: failure to allocate request queue\n"); - continue; - } - Controller->RequestQueue[n] = RequestQueue; - RequestQueue->queuedata = Controller; - blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); - blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); - disk->queue = RequestQueue; - sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n); - disk->major = MajorNumber; - disk->first_minor = n << DAC960_MaxPartitionsBits; - disk->fops = &DAC960_BlockDeviceOperations; - } - /* - Indicate the Block Device Registration completed successfully, - */ - return true; -} - - -/* - DAC960_UnregisterBlockDevice unregisters the Block Device structures - associated with Controller. -*/ - -static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int disk; - - /* does order matter when deleting gendisk and cleanup in request queue? */ - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - del_gendisk(Controller->disks[disk]); - blk_cleanup_queue(Controller->RequestQueue[disk]); - Controller->RequestQueue[disk] = NULL; - } - - /* - Unregister the Block Device Major Number for this DAC960 Controller. - */ - unregister_blkdev(MajorNumber, "dac960"); -} - -/* - DAC960_ComputeGenericDiskInfo computes the values for the Generic Disk - Information Partition Sector Counts and Block Sizes. -*/ - -static void DAC960_ComputeGenericDiskInfo(DAC960_Controller_T *Controller) -{ - int disk; - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); -} - -/* - DAC960_ReportErrorStatus reports Controller BIOS Messages passed through - the Error Status Register when the driver performs the BIOS handshaking. - It returns true for fatal errors and false otherwise. -*/ - -static bool DAC960_ReportErrorStatus(DAC960_Controller_T *Controller, - unsigned char ErrorStatus, - unsigned char Parameter0, - unsigned char Parameter1) -{ - switch (ErrorStatus) - { - case 0x00: - DAC960_Notice("Physical Device %d:%d Not Responding\n", - Controller, Parameter1, Parameter0); - break; - case 0x08: - if (Controller->DriveSpinUpMessageDisplayed) break; - DAC960_Notice("Spinning Up Drives\n", Controller); - Controller->DriveSpinUpMessageDisplayed = true; - break; - case 0x30: - DAC960_Notice("Configuration Checksum Error\n", Controller); - break; - case 0x60: - DAC960_Notice("Mirror Race Recovery Failed\n", Controller); - break; - case 0x70: - DAC960_Notice("Mirror Race Recovery In Progress\n", Controller); - break; - case 0x90: - DAC960_Notice("Physical Device %d:%d COD Mismatch\n", - Controller, Parameter1, Parameter0); - break; - case 0xA0: - DAC960_Notice("Logical Drive Installation Aborted\n", Controller); - break; - case 0xB0: - DAC960_Notice("Mirror Race On A Critical Logical Drive\n", Controller); - break; - case 0xD0: - DAC960_Notice("New Controller Configuration Found\n", Controller); - break; - case 0xF0: - DAC960_Error("Fatal Memory Parity Error for Controller at\n", Controller); - return true; - default: - DAC960_Error("Unknown Initialization Error %02X for Controller at\n", - Controller, ErrorStatus); - return true; - } - return false; -} - - -/* - * DAC960_DetectCleanup releases the resources that were allocated - * during DAC960_DetectController(). DAC960_DetectController can - * has several internal failure points, so not ALL resources may - * have been allocated. It's important to free only - * resources that HAVE been allocated. The code below always - * tests that the resource has been allocated before attempting to - * free it. - */ -static void DAC960_DetectCleanup(DAC960_Controller_T *Controller) -{ - int i; - - /* Free the memory mailbox, status, and related structures */ - free_dma_loaf(Controller->PCIDevice, &Controller->DmaPages); - if (Controller->MemoryMappedAddress) { - switch(Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PD_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_P_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - } - iounmap(Controller->MemoryMappedAddress); - } - if (Controller->IRQ_Channel) - free_irq(Controller->IRQ_Channel, Controller); - if (Controller->IO_Address) - release_region(Controller->IO_Address, 0x80); - pci_disable_device(Controller->PCIDevice); - for (i = 0; (i < DAC960_MaxLogicalDrives) && Controller->disks[i]; i++) - put_disk(Controller->disks[i]); - DAC960_Controllers[Controller->ControllerNumber] = NULL; - kfree(Controller); -} - - -/* - DAC960_DetectController detects Mylex DAC960/AcceleRAID/eXtremeRAID - PCI RAID Controllers by interrogating the PCI Configuration Space for - Controller Type. -*/ - -static DAC960_Controller_T * -DAC960_DetectController(struct pci_dev *PCI_Device, - const struct pci_device_id *entry) -{ - struct DAC960_privdata *privdata = - (struct DAC960_privdata *)entry->driver_data; - irq_handler_t InterruptHandler = privdata->InterruptHandler; - unsigned int MemoryWindowSize = privdata->MemoryWindowSize; - DAC960_Controller_T *Controller = NULL; - unsigned char DeviceFunction = PCI_Device->devfn; - unsigned char ErrorStatus, Parameter0, Parameter1; - unsigned int IRQ_Channel; - void __iomem *BaseAddress; - int i; - - Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC); - if (Controller == NULL) { - DAC960_Error("Unable to allocate Controller structure for " - "Controller at\n", NULL); - return NULL; - } - Controller->ControllerNumber = DAC960_ControllerCount; - DAC960_Controllers[DAC960_ControllerCount++] = Controller; - Controller->Bus = PCI_Device->bus->number; - Controller->FirmwareType = privdata->FirmwareType; - Controller->HardwareType = privdata->HardwareType; - Controller->Device = DeviceFunction >> 3; - Controller->Function = DeviceFunction & 0x7; - Controller->PCIDevice = PCI_Device; - strcpy(Controller->FullModelName, "DAC960"); - - if (pci_enable_device(PCI_Device)) - goto Failure; - - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_BA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LP_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PG_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PD_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - case DAC960_P_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - } - - pci_set_drvdata(PCI_Device, (void *)((long)Controller->ControllerNumber)); - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - Controller->disks[i] = alloc_disk(1<<DAC960_MaxPartitionsBits); - if (!Controller->disks[i]) - goto Failure; - Controller->disks[i]->private_data = (void *)((long)i); - } - init_waitqueue_head(&Controller->CommandWaitQueue); - init_waitqueue_head(&Controller->HealthStatusWaitQueue); - spin_lock_init(&Controller->queue_lock); - DAC960_AnnounceDriver(Controller); - /* - Map the Controller Register Window. - */ - if (MemoryWindowSize < PAGE_SIZE) - MemoryWindowSize = PAGE_SIZE; - Controller->MemoryMappedAddress = - ioremap_nocache(Controller->PCI_Address & PAGE_MASK, MemoryWindowSize); - Controller->BaseAddress = - Controller->MemoryMappedAddress + (Controller->PCI_Address & ~PAGE_MASK); - if (Controller->MemoryMappedAddress == NULL) - { - DAC960_Error("Unable to map Controller Register Window for " - "Controller at\n", Controller); - goto Failure; - } - BaseAddress = Controller->BaseAddress; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(BaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_GEM_InitializationInProgressP(BaseAddress)) - { - if (DAC960_GEM_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_GEM_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_GEM_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(BaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_BA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_BA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_BA_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_BA_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(BaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LP_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LP_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LP_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_LP_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(BaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LA_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_LA_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_LA_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(BaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_PG_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PG_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PG_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_PG_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_PG_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PD_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)) { - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_PD_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_P_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)){ - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory" - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_P_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - } - /* - Acquire shared access to the IRQ Channel. - */ - IRQ_Channel = PCI_Device->irq; - if (request_irq(IRQ_Channel, InterruptHandler, IRQF_SHARED, - Controller->FullModelName, Controller) < 0) - { - DAC960_Error("Unable to acquire IRQ Channel %d for Controller at\n", - Controller, Controller->IRQ_Channel); - goto Failure; - } - Controller->IRQ_Channel = IRQ_Channel; - Controller->InitialCommand.CommandIdentifier = 1; - Controller->InitialCommand.Controller = Controller; - Controller->Commands[0] = &Controller->InitialCommand; - Controller->FreeCommands = &Controller->InitialCommand; - return Controller; - -Failure: - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_DetectCleanup(Controller); - DAC960_ControllerCount--; - return NULL; -} - -/* - DAC960_InitializeController initializes Controller. -*/ - -static bool -DAC960_InitializeController(DAC960_Controller_T *Controller) -{ - if (DAC960_ReadControllerConfiguration(Controller) && - DAC960_ReportControllerConfiguration(Controller) && - DAC960_CreateAuxiliaryStructures(Controller) && - DAC960_ReadDeviceConfiguration(Controller) && - DAC960_ReportDeviceConfiguration(Controller) && - DAC960_RegisterBlockDevice(Controller)) - { - /* - Initialize the Monitoring Timer. - */ - timer_setup(&Controller->MonitoringTimer, - DAC960_MonitoringTimerFunction, 0); - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - Controller->ControllerInitialized = true; - return true; - } - return false; -} - - -/* - DAC960_FinalizeController finalizes Controller. -*/ - -static void DAC960_FinalizeController(DAC960_Controller_T *Controller) -{ - if (Controller->ControllerInitialized) - { - unsigned long flags; - - /* - * Acquiring and releasing lock here eliminates - * a very low probability race. - * - * The code below allocates controller command structures - * from the free list without holding the controller lock. - * This is safe assuming there is no other activity on - * the controller at the time. - * - * But, there might be a monitoring command still - * in progress. Setting the Shutdown flag while holding - * the lock ensures that there is no monitoring command - * in the interrupt handler currently, and any monitoring - * commands that complete from this time on will NOT return - * their command structure to the free list. - */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - Controller->ShutdownMonitoringTimer = 1; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - del_timer_sync(&Controller->MonitoringTimer); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V1_ExecuteType3(Controller, DAC960_V1_Flush, 0); - DAC960_Notice("done\n", Controller); - - if (Controller->HardwareType == DAC960_PD_Controller) - release_region(Controller->IO_Address, 0x80); - } - else - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V2_DeviceOperation(Controller, DAC960_V2_PauseDevice, - DAC960_V2_RAID_Controller); - DAC960_Notice("done\n", Controller); - } - } - DAC960_UnregisterBlockDevice(Controller); - DAC960_DestroyAuxiliaryStructures(Controller); - DAC960_DestroyProcEntries(Controller); - DAC960_DetectCleanup(Controller); -} - - -/* - DAC960_Probe verifies controller's existence and - initializes the DAC960 Driver for that controller. -*/ - -static int -DAC960_Probe(struct pci_dev *dev, const struct pci_device_id *entry) -{ - int disk; - DAC960_Controller_T *Controller; - - if (DAC960_ControllerCount == DAC960_MaxControllers) - { - DAC960_Error("More than %d DAC960 Controllers detected - " - "ignoring from Controller at\n", - NULL, DAC960_MaxControllers); - return -ENODEV; - } - - Controller = DAC960_DetectController(dev, entry); - if (!Controller) - return -ENODEV; - - if (!DAC960_InitializeController(Controller)) { - DAC960_FinalizeController(Controller); - return -ENODEV; - } - - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); - add_disk(Controller->disks[disk]); - } - DAC960_CreateProcEntries(Controller); - return 0; -} - - -/* - DAC960_Finalize finalizes the DAC960 Driver. -*/ - -static void DAC960_Remove(struct pci_dev *PCI_Device) -{ - int Controller_Number = (long)pci_get_drvdata(PCI_Device); - DAC960_Controller_T *Controller = DAC960_Controllers[Controller_Number]; - if (Controller != NULL) - DAC960_FinalizeController(Controller); -} - - -/* - DAC960_V1_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ScatterGatherSegment_T *ScatterGatherList = - Command->V1.ScatterGatherList; - struct scatterlist *ScatterList = Command->V1.ScatterList; - - DAC960_V1_ClearCommand(Command); - - if (Command->SegmentCount == 1) - { - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Read; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - } - else - { - int i; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_ReadWithScatterGather; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_WriteWithScatterGather; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = Command->V1.ScatterGatherListDMA; - - CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount32_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - struct scatterlist *ScatterList = Command->V2.ScatterList; - - DAC960_V2_ClearCommand(Command); - - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10; - CommandMailbox->SCSI_10.CommandControlBits.DataTransferControllerToHost = - (Command->DmaDirection == PCI_DMA_FROMDEVICE); - CommandMailbox->SCSI_10.DataTransferSize = - Command->BlockCount << DAC960_BlockSizeBits; - CommandMailbox->SCSI_10.RequestSenseBusAddress = Command->V2.RequestSenseDMA; - CommandMailbox->SCSI_10.PhysicalDevice = - Controller->V2.LogicalDriveToVirtualDevice[Command->LogicalDriveNumber]; - CommandMailbox->SCSI_10.RequestSenseSize = sizeof(DAC960_SCSI_RequestSense_T); - CommandMailbox->SCSI_10.CDBLength = 10; - CommandMailbox->SCSI_10.SCSI_CDB[0] = - (Command->DmaDirection == PCI_DMA_FROMDEVICE ? 0x28 : 0x2A); - CommandMailbox->SCSI_10.SCSI_CDB[2] = Command->BlockNumber >> 24; - CommandMailbox->SCSI_10.SCSI_CDB[3] = Command->BlockNumber >> 16; - CommandMailbox->SCSI_10.SCSI_CDB[4] = Command->BlockNumber >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[5] = Command->BlockNumber; - CommandMailbox->SCSI_10.SCSI_CDB[7] = Command->BlockCount >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[8] = Command->BlockCount; - - if (Command->SegmentCount == 1) - { - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; - } - else - { - DAC960_V2_ScatterGatherSegment_T *ScatterGatherList; - int i; - - if (Command->SegmentCount > 2) - { - ScatterGatherList = Command->V2.ScatterGatherList; - CommandMailbox->SCSI_10.CommandControlBits - .AdditionalScatterGatherListMemory = true; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Length = Command->SegmentCount; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Address = - Command->V2.ScatterGatherListDMA; - } - else - ScatterGatherList = CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount64_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_queue *req_q) -{ - struct request *Request; - DAC960_Command_T *Command; - - while(1) { - Request = blk_peek_request(req_q); - if (!Request) - return 1; - - Command = DAC960_AllocateCommand(Controller); - if (Command == NULL) - return 0; - - if (rq_data_dir(Request) == READ) { - Command->DmaDirection = PCI_DMA_FROMDEVICE; - Command->CommandType = DAC960_ReadCommand; - } else { - Command->DmaDirection = PCI_DMA_TODEVICE; - Command->CommandType = DAC960_WriteCommand; - } - Command->Completion = Request->end_io_data; - Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = blk_rq_sectors(Request); - Command->Request = Request; - blk_start_request(Request); - Command->SegmentCount = blk_rq_map_sg(req_q, - Command->Request, Command->cmd_sglist); - /* pci_map_sg MAY change the value of SegCount */ - Command->SegmentCount = pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - DAC960_QueueReadWriteCommand(Command); - } -} - -/* - DAC960_ProcessRequest attempts to remove one I/O Request from Controller's - I/O Request Queue and queues it to the Controller. WaitForCommand is true if - this function should wait for a Command to become available if necessary. - This function returns true if an I/O Request was queued and false otherwise. -*/ -static void DAC960_ProcessRequest(DAC960_Controller_T *controller) -{ - int i; - - if (!controller->ControllerInitialized) - return; - - /* Do this better later! */ - for (i = controller->req_q_index; i < DAC960_MaxLogicalDrives; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } - - if (controller->req_q_index == 0) - return; - - for (i = 0; i < controller->req_q_index; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } -} - - -/* - DAC960_queue_partial_rw extracts one bio from the request already - associated with argument command, and construct a new command block to retry I/O - only on that bio. Queue that command to the controller. - - This function re-uses a previously-allocated Command, - there is no failure mode from trying to allocate a command. -*/ - -static void DAC960_queue_partial_rw(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - struct request *Request = Command->Request; - struct request_queue *req_q = Controller->RequestQueue[Command->LogicalDriveNumber]; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - Command->CommandType = DAC960_ReadRetryCommand; - else - Command->CommandType = DAC960_WriteRetryCommand; - - /* - * We could be more efficient with these mapping requests - * and map only the portions that we need. But since this - * code should almost never be called, just go with a - * simple coding. - */ - (void)blk_rq_map_sg(req_q, Command->Request, Command->cmd_sglist); - - (void)pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, 1, Command->DmaDirection); - /* - * Resubmitting the request sector at a time is really tedious. - * But, this should almost never happen. So, we're willing to pay - * this price so that in the end, as much of the transfer is completed - * successfully as possible. - */ - Command->SegmentCount = 1; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = 1; - DAC960_QueueReadWriteCommand(Command); - return; -} - -/* - DAC960_RequestFunction is the I/O Request Function for DAC960 Controllers. -*/ - -static void DAC960_RequestFunction(struct request_queue *RequestQueue) -{ - DAC960_ProcessRequest(RequestQueue->queuedata); -} - -/* - DAC960_ProcessCompletedBuffer performs completion processing for an - individual Buffer. -*/ - -static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command, - bool SuccessfulIO) -{ - struct request *Request = Command->Request; - blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR; - - pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - if (!__blk_end_request(Request, Error, Command->BlockCount << 9)) { - if (Command->Completion) { - complete(Command->Completion); - Command->Completion = NULL; - } - return true; - } - return false; -} - -/* - DAC960_V1_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V1_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - switch (Command->V1.CommandStatus) - { - case DAC960_V1_IrrecoverableDataError: - DAC960_Error("Irrecoverable Data Error on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_LogicalDriveNonexistentOrOffline: - DAC960_Error("Logical Drive Nonexistent or Offline on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_AccessBeyondEndOfLogicalDrive: - DAC960_Error("Attempt to Access Beyond End of Logical Drive " - "on %s:\n", Controller, CommandName); - break; - case DAC960_V1_BadDataEncountered: - DAC960_Error("Bad Data Encountered on %s:\n", Controller, CommandName); - break; - default: - DAC960_Error("Unexpected Error Status %04X on %s:\n", - Controller, Command->V1.CommandStatus, CommandName); - break; - } - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V1_ProcessCompletedCommand performs completion processing for Command - for DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V1_CommandOpcode_T CommandOpcode = - Command->V1.CommandMailbox.Common.CommandOpcode; - DAC960_V1_CommandStatus_T CommandStatus = Command->V1.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V1_IrrecoverableDataError; -#endif - - if (CommandStatus == DAC960_V1_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (CommandStatus == DAC960_V1_IrrecoverableDataError || - CommandStatus == DAC960_V1_BadDataEncountered) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - - if (!DAC960_ProcessCompletedRequest(Command, false)) - BUG(); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V1_NormalCompletion) { - normal_completion = false; - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V1 error retry failure test\n"); - normal_completion = false; - DAC960_V1_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (CommandOpcode == DAC960_V1_Enquiry) - { - DAC960_V1_Enquiry_T *OldEnquiry = &Controller->V1.Enquiry; - DAC960_V1_Enquiry_T *NewEnquiry = Controller->V1.NewEnquiry; - unsigned int OldCriticalLogicalDriveCount = - OldEnquiry->CriticalLogicalDriveCount; - unsigned int NewCriticalLogicalDriveCount = - NewEnquiry->CriticalLogicalDriveCount; - if (NewEnquiry->NumberOfLogicalDrives > Controller->LogicalDriveCount) - { - int LogicalDriveNumber = Controller->LogicalDriveCount - 1; - while (++LogicalDriveNumber < NewEnquiry->NumberOfLogicalDrives) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->NumberOfLogicalDrives < Controller->LogicalDriveCount) - { - int LogicalDriveNumber = NewEnquiry->NumberOfLogicalDrives - 1; - while (++LogicalDriveNumber < Controller->LogicalDriveCount) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->StatusFlags.DeferredWriteError != - OldEnquiry->StatusFlags.DeferredWriteError) - DAC960_Critical("Deferred Write Error Flag is now %s\n", Controller, - (NewEnquiry->StatusFlags.DeferredWriteError - ? "TRUE" : "FALSE")); - if ((NewCriticalLogicalDriveCount > 0 || - NewCriticalLogicalDriveCount != OldCriticalLogicalDriveCount) || - (NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount != - OldEnquiry->OfflineLogicalDriveCount) || - (NewEnquiry->DeadDriveCount > 0 || - NewEnquiry->DeadDriveCount != - OldEnquiry->DeadDriveCount) || - (NewEnquiry->EventLogSequenceNumber != - OldEnquiry->EventLogSequenceNumber) || - Controller->MonitoringTimerCount == 0 || - time_after_eq(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - Controller->V1.NeedLogicalDriveInformation = true; - Controller->V1.NewEventLogSequenceNumber = - NewEnquiry->EventLogSequenceNumber; - Controller->V1.NeedErrorTableInformation = true; - Controller->V1.NeedDeviceStateInformation = true; - Controller->V1.StartDeviceStateScan = true; - Controller->V1.NeedBackgroundInitializationStatus = - Controller->V1.BackgroundInitializationStatusSupported; - Controller->SecondaryMonitoringTime = jiffies; - } - if (NewEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_BackgroundRebuildInProgress) - { - Controller->V1.NeedRebuildProgress = true; - Controller->V1.RebuildProgressFirst = - (NewEnquiry->CriticalLogicalDriveCount < - OldEnquiry->CriticalLogicalDriveCount); - } - if (OldEnquiry->RebuildFlag == DAC960_V1_BackgroundCheckInProgress) - switch (NewEnquiry->RebuildFlag) - { - case DAC960_V1_NoStandbyRebuildOrCheckInProgress: - DAC960_Progress("Consistency Check Completed Successfully\n", - Controller); - break; - case DAC960_V1_StandbyRebuildInProgress: - case DAC960_V1_BackgroundRebuildInProgress: - break; - case DAC960_V1_BackgroundCheckInProgress: - Controller->V1.NeedConsistencyCheckProgress = true; - break; - case DAC960_V1_StandbyRebuildCompletedWithError: - DAC960_Progress("Consistency Check Completed with Error\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Physical Device Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Logical Drive Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses: - DAC960_Progress("Consistency Check Failed - Other Causes\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated: - DAC960_Progress("Consistency Check Successfully Terminated\n", - Controller); - break; - } - else if (NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundCheckInProgress) - Controller->V1.NeedConsistencyCheckProgress = true; - Controller->MonitoringAlertMode = - (NewEnquiry->CriticalLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->DeadDriveCount > 0); - if (NewEnquiry->RebuildFlag > DAC960_V1_BackgroundCheckInProgress) - { - Controller->V1.PendingRebuildFlag = NewEnquiry->RebuildFlag; - Controller->V1.RebuildFlagPending = true; - } - memcpy(&Controller->V1.Enquiry, &Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - } - else if (CommandOpcode == DAC960_V1_PerformEventLogOperation) - { - static char - *DAC960_EventMessages[] = - { "killed because write recovery failed", - "killed because of SCSI bus reset failure", - "killed because of double check condition", - "killed because it was removed", - "killed because of gross error on SCSI chip", - "killed because of bad tag returned from drive", - "killed because of timeout on SCSI command", - "killed because of reset SCSI command issued from system", - "killed because busy or parity error count exceeded limit", - "killed because of 'kill drive' command from system", - "killed because of selection timeout", - "killed due to SCSI phase sequence error", - "killed due to unknown status" }; - DAC960_V1_EventLogEntry_T *EventLogEntry = - Controller->V1.EventLogEntry; - if (EventLogEntry->SequenceNumber == - Controller->V1.OldEventLogSequenceNumber) - { - unsigned char SenseKey = EventLogEntry->SenseKey; - unsigned char AdditionalSenseCode = - EventLogEntry->AdditionalSenseCode; - unsigned char AdditionalSenseCodeQualifier = - EventLogEntry->AdditionalSenseCodeQualifier; - if (SenseKey == DAC960_SenseKey_VendorSpecific && - AdditionalSenseCode == 0x80 && - AdditionalSenseCodeQualifier < - ARRAY_SIZE(DAC960_EventMessages)) - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - DAC960_EventMessages[ - AdditionalSenseCodeQualifier]); - else if (SenseKey == DAC960_SenseKey_UnitAttention && - AdditionalSenseCode == 0x29) - { - if (Controller->MonitoringTimerCount > 0) - Controller->V1.DeviceResetCount[EventLogEntry->Channel] - [EventLogEntry->TargetID]++; - } - else if (!(SenseKey == DAC960_SenseKey_NoSense || - (SenseKey == DAC960_SenseKey_NotReady && - AdditionalSenseCode == 0x04 && - (AdditionalSenseCodeQualifier == 0x01 || - AdditionalSenseCodeQualifier == 0x02)))) - { - DAC960_Critical("Physical Device %d:%d Error Log: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - SenseKey, - AdditionalSenseCode, - AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Error Log: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - EventLogEntry->Information[0], - EventLogEntry->Information[1], - EventLogEntry->Information[2], - EventLogEntry->Information[3], - EventLogEntry->CommandSpecificInformation[0], - EventLogEntry->CommandSpecificInformation[1], - EventLogEntry->CommandSpecificInformation[2], - EventLogEntry->CommandSpecificInformation[3]); - } - } - Controller->V1.OldEventLogSequenceNumber++; - } - else if (CommandOpcode == DAC960_V1_GetErrorTable) - { - DAC960_V1_ErrorTable_T *OldErrorTable = &Controller->V1.ErrorTable; - DAC960_V1_ErrorTable_T *NewErrorTable = Controller->V1.NewErrorTable; - int Channel, TargetID; - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_V1_ErrorTableEntry_T *NewErrorEntry = - &NewErrorTable->ErrorTableEntries[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *OldErrorEntry = - &OldErrorTable->ErrorTableEntries[Channel][TargetID]; - if ((NewErrorEntry->ParityErrorCount != - OldErrorEntry->ParityErrorCount) || - (NewErrorEntry->SoftErrorCount != - OldErrorEntry->SoftErrorCount) || - (NewErrorEntry->HardErrorCount != - OldErrorEntry->HardErrorCount) || - (NewErrorEntry->MiscErrorCount != - OldErrorEntry->MiscErrorCount)) - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, Channel, TargetID, - NewErrorEntry->ParityErrorCount, - NewErrorEntry->SoftErrorCount, - NewErrorEntry->HardErrorCount, - NewErrorEntry->MiscErrorCount); - } - memcpy(&Controller->V1.ErrorTable, Controller->V1.NewErrorTable, - sizeof(DAC960_V1_ErrorTable_T)); - } - else if (CommandOpcode == DAC960_V1_GetDeviceState) - { - DAC960_V1_DeviceState_T *OldDeviceState = - &Controller->V1.DeviceState[Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - DAC960_V1_DeviceState_T *NewDeviceState = - Controller->V1.NewDeviceState; - if (NewDeviceState->DeviceState != OldDeviceState->DeviceState) - DAC960_Critical("Physical Device %d:%d is now %s\n", Controller, - Controller->V1.DeviceStateChannel, - Controller->V1.DeviceStateTargetID, - (NewDeviceState->DeviceState - == DAC960_V1_Device_Dead - ? "DEAD" - : NewDeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "WRITE-ONLY" - : NewDeviceState->DeviceState - == DAC960_V1_Device_Online - ? "ONLINE" : "STANDBY")); - if (OldDeviceState->DeviceState == DAC960_V1_Device_Dead && - NewDeviceState->DeviceState != DAC960_V1_Device_Dead) - { - Controller->V1.NeedDeviceInquiryInformation = true; - Controller->V1.NeedDeviceSerialNumberInformation = true; - Controller->V1.DeviceResetCount - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID] = 0; - } - memcpy(OldDeviceState, NewDeviceState, - sizeof(DAC960_V1_DeviceState_T)); - } - else if (CommandOpcode == DAC960_V1_GetLogicalDriveInformation) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *OldLogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_V1_LogicalDriveInformation_T *NewLogicalDriveInformation = - &(*Controller->V1.NewLogicalDriveInformation)[LogicalDriveNumber]; - if (NewLogicalDriveInformation->LogicalDriveState != - OldLogicalDriveInformation->LogicalDriveState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "ONLINE" - : NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "CRITICAL" : "OFFLINE")); - if (NewLogicalDriveInformation->WriteBack != - OldLogicalDriveInformation->WriteBack) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->WriteBack - ? "WRITE BACK" : "WRITE THRU")); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - } - else if (CommandOpcode == DAC960_V1_GetRebuildProgress) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NoRebuildOrCheckInProgress && - Controller->V1.LastRebuildStatus == DAC960_V1_NormalCompletion) - CommandStatus = DAC960_V1_RebuildSuccessful; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Rebuild in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_RebuildFailed_LogicalDriveFailure: - DAC960_Progress("Rebuild Failed due to " - "Logical Drive Failure\n", Controller); - break; - case DAC960_V1_RebuildFailed_BadBlocksOnOther: - DAC960_Progress("Rebuild Failed due to " - "Bad Blocks on Other Drives\n", Controller); - break; - case DAC960_V1_RebuildFailed_NewDriveFailed: - DAC960_Progress("Rebuild Failed due to " - "Failure of Drive Being Rebuilt\n", Controller); - break; - case DAC960_V1_NoRebuildOrCheckInProgress: - break; - case DAC960_V1_RebuildSuccessful: - DAC960_Progress("Rebuild Completed Successfully\n", Controller); - break; - case DAC960_V1_RebuildSuccessfullyTerminated: - DAC960_Progress("Rebuild Successfully Terminated\n", Controller); - break; - } - Controller->V1.LastRebuildStatus = CommandStatus; - if (CommandType != DAC960_MonitoringCommand && - Controller->V1.RebuildStatusPending) - { - Command->V1.CommandStatus = Controller->V1.PendingRebuildStatus; - Controller->V1.RebuildStatusPending = false; - } - else if (CommandType == DAC960_MonitoringCommand && - CommandStatus != DAC960_V1_NormalCompletion && - CommandStatus != DAC960_V1_NoRebuildOrCheckInProgress) - { - Controller->V1.PendingRebuildStatus = CommandStatus; - Controller->V1.RebuildStatusPending = true; - } - } - else if (CommandOpcode == DAC960_V1_RebuildStat) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NormalCompletion) - { - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Consistency Check in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - } - } - else if (CommandOpcode == DAC960_V1_BackgroundInitializationControl) - { - unsigned int LogicalDriveNumber = - Controller->V1.BackgroundInitializationStatus->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.BackgroundInitializationStatus->LogicalDriveSize; - unsigned int BlocksCompleted = - Controller->V1.BackgroundInitializationStatus->BlocksCompleted; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - switch (Controller->V1.BackgroundInitializationStatus->Status) - { - case DAC960_V1_BackgroundInitializationInvalid: - break; - case DAC960_V1_BackgroundInitializationStarted: - DAC960_Progress("Background Initialization Started\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationInProgress: - if (BlocksCompleted == - Controller->V1.LastBackgroundInitializationStatus. - BlocksCompleted && - LogicalDriveNumber == - Controller->V1.LastBackgroundInitializationStatus. - LogicalDriveNumber) - break; - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Background Initialization in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_BackgroundInitializationSuspended: - DAC960_Progress("Background Initialization Suspended\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationCancelled: - DAC960_Progress("Background Initialization Cancelled\n", - Controller); - break; - } - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - break; - case DAC960_V1_BackgroundInitSuccessful: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization " - "Completed Successfully\n", Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_BackgroundInitAborted: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization Aborted\n", - Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_NoBackgroundInitInProgress: - break; - } - } - else if (CommandOpcode == DAC960_V1_DCDB) - { - /* - This is a bit ugly. - - The InquiryStandardData and - the InquiryUntitSerialNumber information - retrieval operations BOTH use the DAC960_V1_DCDB - commands. the test above can't distinguish between - these two cases. - - Instead, we rely on the order of code later in this - function to ensure that DeviceInquiryInformation commands - are submitted before DeviceSerialNumber commands. - */ - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryStandardData, 0, - sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryStandardData, - Controller->V1.NewInquiryStandardData, - sizeof(DAC960_SCSI_Inquiry_T)); - Controller->V1.NeedDeviceInquiryInformation = false; - } - else if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryUnitSerialNumber, - Controller->V1.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - Controller->V1.NeedDeviceSerialNumberInformation = false; - } - } - /* - Begin submitting new monitoring commands. - */ - if (Controller->V1.NewEventLogSequenceNumber - - Controller->V1.OldEventLogSequenceNumber > 0) - { - Command->V1.CommandMailbox.Type3E.CommandOpcode = - DAC960_V1_PerformEventLogOperation; - Command->V1.CommandMailbox.Type3E.OperationType = - DAC960_V1_GetEventLogEntry; - Command->V1.CommandMailbox.Type3E.OperationQualifier = 1; - Command->V1.CommandMailbox.Type3E.SequenceNumber = - Controller->V1.OldEventLogSequenceNumber; - Command->V1.CommandMailbox.Type3E.BusAddress = - Controller->V1.EventLogEntryDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedErrorTableInformation) - { - Controller->V1.NeedErrorTableInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetErrorTable; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewErrorTableDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress && - Controller->V1.RebuildProgressFirst) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceStateInformation) - { - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - - dma_addr_t NewInquiryStandardDataDMA = - Controller->V1.NewInquiryStandardDataDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - dma_addr_t NewInquiryUnitSerialNumberDMA = - Controller->V1.NewInquiryUnitSerialNumberDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.StartDeviceStateScan) - { - Controller->V1.DeviceStateChannel = 0; - Controller->V1.DeviceStateTargetID = 0; - Controller->V1.StartDeviceStateScan = false; - } - else if (++Controller->V1.DeviceStateTargetID == Controller->Targets) - { - Controller->V1.DeviceStateChannel++; - Controller->V1.DeviceStateTargetID = 0; - } - if (Controller->V1.DeviceStateChannel < Controller->Channels) - { - Controller->V1.NewDeviceState->DeviceState = - DAC960_V1_Device_Dead; - Command->V1.CommandMailbox.Type3D.CommandOpcode = - DAC960_V1_GetDeviceState; - Command->V1.CommandMailbox.Type3D.Channel = - Controller->V1.DeviceStateChannel; - Command->V1.CommandMailbox.Type3D.TargetID = - Controller->V1.DeviceStateTargetID; - Command->V1.CommandMailbox.Type3D.BusAddress = - Controller->V1.NewDeviceStateDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->V1.NeedDeviceStateInformation = false; - } - if (Controller->V1.NeedLogicalDriveInformation) - { - Controller->V1.NeedLogicalDriveInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetLogicalDriveInformation; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewLogicalDriveInformationDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedConsistencyCheckProgress) - { - Controller->V1.NeedConsistencyCheckProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_RebuildStat; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedBackgroundInitializationStatus) - { - Controller->V1.NeedBackgroundInitializationStatus = false; - Command->V1.CommandMailbox.Type3B.CommandOpcode = - DAC960_V1_BackgroundInitializationControl; - Command->V1.CommandMailbox.Type3B.CommandOpcode2 = 0x20; - Command->V1.CommandMailbox.Type3B.BusAddress = - Controller->V1.BackgroundInitializationStatusDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V1_KernelCommand_T *KernelCommand = Command->V1.KernelCommand; - KernelCommand->CommandStatus = Command->V1.CommandStatus; - Command->V1.KernelCommand = NULL; - if (CommandOpcode == DAC960_V1_DCDB) - Controller->V1.DirectCommandActive[KernelCommand->DCDB->Channel] - [KernelCommand->DCDB->TargetID] = - false; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V1_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - - -/* - DAC960_V2_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V2_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - static const unsigned char *SenseErrors[] = { - "NO SENSE", "RECOVERED ERROR", - "NOT READY", "MEDIUM ERROR", - "HARDWARE ERROR", "ILLEGAL REQUEST", - "UNIT ATTENTION", "DATA PROTECT", - "BLANK CHECK", "VENDOR-SPECIFIC", - "COPY ABORTED", "ABORTED COMMAND", - "EQUAL", "VOLUME OVERFLOW", - "MISCOMPARE", "RESERVED" - }; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - DAC960_Error("Error Condition %s on %s:\n", Controller, - SenseErrors[Command->V2.RequestSense->SenseKey], CommandName); - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V2_ReportEvent prints an appropriate message when a Controller Event - occurs. -*/ - -static void DAC960_V2_ReportEvent(DAC960_Controller_T *Controller, - DAC960_V2_Event_T *Event) -{ - DAC960_SCSI_RequestSense_T *RequestSense = - (DAC960_SCSI_RequestSense_T *) &Event->RequestSenseData; - unsigned char MessageBuffer[DAC960_LineBufferSize]; - static struct { int EventCode; unsigned char *EventMessage; } EventList[] = - { /* Physical Device Events (0x0000 - 0x007F) */ - { 0x0001, "P Online" }, - { 0x0002, "P Standby" }, - { 0x0005, "P Automatic Rebuild Started" }, - { 0x0006, "P Manual Rebuild Started" }, - { 0x0007, "P Rebuild Completed" }, - { 0x0008, "P Rebuild Cancelled" }, - { 0x0009, "P Rebuild Failed for Unknown Reasons" }, - { 0x000A, "P Rebuild Failed due to New Physical Device" }, - { 0x000B, "P Rebuild Failed due to Logical Drive Failure" }, - { 0x000C, "S Offline" }, - { 0x000D, "P Found" }, - { 0x000E, "P Removed" }, - { 0x000F, "P Unconfigured" }, - { 0x0010, "P Expand Capacity Started" }, - { 0x0011, "P Expand Capacity Completed" }, - { 0x0012, "P Expand Capacity Failed" }, - { 0x0013, "P Command Timed Out" }, - { 0x0014, "P Command Aborted" }, - { 0x0015, "P Command Retried" }, - { 0x0016, "P Parity Error" }, - { 0x0017, "P Soft Error" }, - { 0x0018, "P Miscellaneous Error" }, - { 0x0019, "P Reset" }, - { 0x001A, "P Active Spare Found" }, - { 0x001B, "P Warm Spare Found" }, - { 0x001C, "S Sense Data Received" }, - { 0x001D, "P Initialization Started" }, - { 0x001E, "P Initialization Completed" }, - { 0x001F, "P Initialization Failed" }, - { 0x0020, "P Initialization Cancelled" }, - { 0x0021, "P Failed because Write Recovery Failed" }, - { 0x0022, "P Failed because SCSI Bus Reset Failed" }, - { 0x0023, "P Failed because of Double Check Condition" }, - { 0x0024, "P Failed because Device Cannot Be Accessed" }, - { 0x0025, "P Failed because of Gross Error on SCSI Processor" }, - { 0x0026, "P Failed because of Bad Tag from Device" }, - { 0x0027, "P Failed because of Command Timeout" }, - { 0x0028, "P Failed because of System Reset" }, - { 0x0029, "P Failed because of Busy Status or Parity Error" }, - { 0x002A, "P Failed because Host Set Device to Failed State" }, - { 0x002B, "P Failed because of Selection Timeout" }, - { 0x002C, "P Failed because of SCSI Bus Phase Error" }, - { 0x002D, "P Failed because Device Returned Unknown Status" }, - { 0x002E, "P Failed because Device Not Ready" }, - { 0x002F, "P Failed because Device Not Found at Startup" }, - { 0x0030, "P Failed because COD Write Operation Failed" }, - { 0x0031, "P Failed because BDT Write Operation Failed" }, - { 0x0039, "P Missing at Startup" }, - { 0x003A, "P Start Rebuild Failed due to Physical Drive Too Small" }, - { 0x003C, "P Temporarily Offline Device Automatically Made Online" }, - { 0x003D, "P Standby Rebuild Started" }, - /* Logical Device Events (0x0080 - 0x00FF) */ - { 0x0080, "M Consistency Check Started" }, - { 0x0081, "M Consistency Check Completed" }, - { 0x0082, "M Consistency Check Cancelled" }, - { 0x0083, "M Consistency Check Completed With Errors" }, - { 0x0084, "M Consistency Check Failed due to Logical Drive Failure" }, - { 0x0085, "M Consistency Check Failed due to Physical Device Failure" }, - { 0x0086, "L Offline" }, - { 0x0087, "L Critical" }, - { 0x0088, "L Online" }, - { 0x0089, "M Automatic Rebuild Started" }, - { 0x008A, "M Manual Rebuild Started" }, - { 0x008B, "M Rebuild Completed" }, - { 0x008C, "M Rebuild Cancelled" }, - { 0x008D, "M Rebuild Failed for Unknown Reasons" }, - { 0x008E, "M Rebuild Failed due to New Physical Device" }, - { 0x008F, "M Rebuild Failed due to Logical Drive Failure" }, - { 0x0090, "M Initialization Started" }, - { 0x0091, "M Initialization Completed" }, - { 0x0092, "M Initialization Cancelled" }, - { 0x0093, "M Initialization Failed" }, - { 0x0094, "L Found" }, - { 0x0095, "L Deleted" }, - { 0x0096, "M Expand Capacity Started" }, - { 0x0097, "M Expand Capacity Completed" }, - { 0x0098, "M Expand Capacity Failed" }, - { 0x0099, "L Bad Block Found" }, - { 0x009A, "L Size Changed" }, - { 0x009B, "L Type Changed" }, - { 0x009C, "L Bad Data Block Found" }, - { 0x009E, "L Read of Data Block in BDT" }, - { 0x009F, "L Write Back Data for Disk Block Lost" }, - { 0x00A0, "L Temporarily Offline RAID-5/3 Drive Made Online" }, - { 0x00A1, "L Temporarily Offline RAID-6/1/0/7 Drive Made Online" }, - { 0x00A2, "L Standby Rebuild Started" }, - /* Fault Management Events (0x0100 - 0x017F) */ - { 0x0140, "E Fan %d Failed" }, - { 0x0141, "E Fan %d OK" }, - { 0x0142, "E Fan %d Not Present" }, - { 0x0143, "E Power Supply %d Failed" }, - { 0x0144, "E Power Supply %d OK" }, - { 0x0145, "E Power Supply %d Not Present" }, - { 0x0146, "E Temperature Sensor %d Temperature Exceeds Safe Limit" }, - { 0x0147, "E Temperature Sensor %d Temperature Exceeds Working Limit" }, - { 0x0148, "E Temperature Sensor %d Temperature Normal" }, - { 0x0149, "E Temperature Sensor %d Not Present" }, - { 0x014A, "E Enclosure Management Unit %d Access Critical" }, - { 0x014B, "E Enclosure Management Unit %d Access OK" }, - { 0x014C, "E Enclosure Management Unit %d Access Offline" }, - /* Controller Events (0x0180 - 0x01FF) */ - { 0x0181, "C Cache Write Back Error" }, - { 0x0188, "C Battery Backup Unit Found" }, - { 0x0189, "C Battery Backup Unit Charge Level Low" }, - { 0x018A, "C Battery Backup Unit Charge Level OK" }, - { 0x0193, "C Installation Aborted" }, - { 0x0195, "C Battery Backup Unit Physically Removed" }, - { 0x0196, "C Memory Error During Warm Boot" }, - { 0x019E, "C Memory Soft ECC Error Corrected" }, - { 0x019F, "C Memory Hard ECC Error Corrected" }, - { 0x01A2, "C Battery Backup Unit Failed" }, - { 0x01AB, "C Mirror Race Recovery Failed" }, - { 0x01AC, "C Mirror Race on Critical Drive" }, - /* Controller Internal Processor Events */ - { 0x0380, "C Internal Controller Hung" }, - { 0x0381, "C Internal Controller Firmware Breakpoint" }, - { 0x0390, "C Internal Controller i960 Processor Specific Error" }, - { 0x03A0, "C Internal Controller StrongARM Processor Specific Error" }, - { 0, "" } }; - int EventListIndex = 0, EventCode; - unsigned char EventType, *EventMessage; - if (Event->EventCode == 0x1C && - RequestSense->SenseKey == DAC960_SenseKey_VendorSpecific && - (RequestSense->AdditionalSenseCode == 0x80 || - RequestSense->AdditionalSenseCode == 0x81)) - Event->EventCode = ((RequestSense->AdditionalSenseCode - 0x80) << 8) | - RequestSense->AdditionalSenseCodeQualifier; - while (true) - { - EventCode = EventList[EventListIndex].EventCode; - if (EventCode == Event->EventCode || EventCode == 0) break; - EventListIndex++; - } - EventType = EventList[EventListIndex].EventMessage[0]; - EventMessage = &EventList[EventListIndex].EventMessage[2]; - if (EventCode == 0) - { - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - return; - } - switch (EventType) - { - case 'P': - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - break; - case 'L': - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'M': - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'S': - if (RequestSense->SenseKey == DAC960_SenseKey_NoSense || - (RequestSense->SenseKey == DAC960_SenseKey_NotReady && - RequestSense->AdditionalSenseCode == 0x04 && - (RequestSense->AdditionalSenseCodeQualifier == 0x01 || - RequestSense->AdditionalSenseCodeQualifier == 0x02))) - break; - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->SenseKey, - RequestSense->AdditionalSenseCode, - RequestSense->AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->Information[0], - RequestSense->Information[1], - RequestSense->Information[2], - RequestSense->Information[3], - RequestSense->CommandSpecificInformation[0], - RequestSense->CommandSpecificInformation[1], - RequestSense->CommandSpecificInformation[2], - RequestSense->CommandSpecificInformation[3]); - break; - case 'E': - if (Controller->SuppressEnclosureMessages) break; - sprintf(MessageBuffer, EventMessage, Event->LogicalUnit); - DAC960_Critical("Enclosure %d %s\n", Controller, - Event->TargetID, MessageBuffer); - break; - case 'C': - DAC960_Critical("Controller %s\n", Controller, EventMessage); - break; - default: - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - break; - } -} - - -/* - DAC960_V2_ReportProgress prints an appropriate progress message for - Logical Device Long Operations. -*/ - -static void DAC960_V2_ReportProgress(DAC960_Controller_T *Controller, - unsigned char *MessageString, - unsigned int LogicalDeviceNumber, - unsigned long BlocksCompleted, - unsigned long LogicalDeviceSize) -{ - Controller->EphemeralProgressMessage = true; - DAC960_Progress("%s in Progress: Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", Controller, - MessageString, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (100 * (BlocksCompleted >> 7)) / (LogicalDeviceSize >> 7)); - Controller->EphemeralProgressMessage = false; -} - - -/* - DAC960_V2_ProcessCompletedCommand performs completion processing for Command - for DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode; - DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode; - DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V2_AbormalCompletion; -#endif - Command->V2.RequestSense->SenseKey = DAC960_SenseKey_MediumError; - - if (CommandStatus == DAC960_V2_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (Command->V2.RequestSense->SenseKey == DAC960_SenseKey_MediumError) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - /* - Perform completion processing for all buffers in this I/O Request. - */ - (void)DAC960_ProcessCompletedRequest(Command, false); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; - -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V2_NormalCompletion) { - normal_completion = false; - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V2 error retry failure test\n"); - normal_completion = false; - DAC960_V2_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (IOCTLOpcode == DAC960_V2_GetControllerInfo) - { - DAC960_V2_ControllerInfo_T *NewControllerInfo = - Controller->V2.NewControllerInformation; - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - Controller->LogicalDriveCount = - NewControllerInfo->LogicalDevicesPresent; - Controller->V2.NeedLogicalDeviceInformation = true; - Controller->V2.NeedPhysicalDeviceInformation = true; - Controller->V2.StartLogicalDeviceInformationScan = true; - Controller->V2.StartPhysicalDeviceInformationScan = true; - Controller->MonitoringAlertMode = - (NewControllerInfo->LogicalDevicesCritical > 0 || - NewControllerInfo->LogicalDevicesOffline > 0 || - NewControllerInfo->PhysicalDisksCritical > 0 || - NewControllerInfo->PhysicalDisksOffline > 0); - memcpy(ControllerInfo, NewControllerInfo, - sizeof(DAC960_V2_ControllerInfo_T)); - } - else if (IOCTLOpcode == DAC960_V2_GetEvent) - { - if (CommandStatus == DAC960_V2_NormalCompletion) { - DAC960_V2_ReportEvent(Controller, Controller->V2.Event); - } - Controller->V2.NextEventSequenceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - unsigned int PhysicalDeviceIndex = Controller->V2.PhysicalDeviceIndex; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - unsigned int DeviceIndex; - while (PhysicalDeviceInfo != NULL && - (NewPhysicalDeviceInfo->Channel > - PhysicalDeviceInfo->Channel || - (NewPhysicalDeviceInfo->Channel == - PhysicalDeviceInfo->Channel && - (NewPhysicalDeviceInfo->TargetID > - PhysicalDeviceInfo->TargetID || - (NewPhysicalDeviceInfo->TargetID == - PhysicalDeviceInfo->TargetID && - NewPhysicalDeviceInfo->LogicalUnit > - PhysicalDeviceInfo->LogicalUnit))))) - { - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - for (DeviceIndex = PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex++) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex+1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex+1]; - } - Controller->V2.PhysicalDeviceInformation - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - Controller->V2.InquiryUnitSerialNumber - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - } - if (PhysicalDeviceInfo == NULL || - (NewPhysicalDeviceInfo->Channel != - PhysicalDeviceInfo->Channel) || - (NewPhysicalDeviceInfo->TargetID != - PhysicalDeviceInfo->TargetID) || - (NewPhysicalDeviceInfo->LogicalUnit != - PhysicalDeviceInfo->LogicalUnit)) - { - PhysicalDeviceInfo = - kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), GFP_ATOMIC); - InquiryUnitSerialNumber = - kmalloc(sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL || - PhysicalDeviceInfo == NULL) - { - kfree(InquiryUnitSerialNumber); - InquiryUnitSerialNumber = NULL; - kfree(PhysicalDeviceInfo); - PhysicalDeviceInfo = NULL; - } - DAC960_Critical("Physical Device %d:%d Now Exists%s\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (PhysicalDeviceInfo != NULL) - { - memset(PhysicalDeviceInfo, 0, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - PhysicalDeviceInfo->PhysicalDeviceState = - DAC960_V2_Device_InvalidState; - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - for (DeviceIndex = DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex > PhysicalDeviceIndex; - DeviceIndex--) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex-1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex-1]; - } - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = - PhysicalDeviceInfo; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - Controller->V2.NeedDeviceSerialNumberInformation = true; - } - } - if (PhysicalDeviceInfo != NULL) - { - if (NewPhysicalDeviceInfo->PhysicalDeviceState != - PhysicalDeviceInfo->PhysicalDeviceState) - DAC960_Critical( - "Physical Device %d:%d is now %s\n", Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "ONLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "REBUILD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "MISSING" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "CRITICAL" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "SUSPECTED-DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "COMMANDED-OFFLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "STANDBY" : "UNKNOWN")); - if ((NewPhysicalDeviceInfo->ParityErrors != - PhysicalDeviceInfo->ParityErrors) || - (NewPhysicalDeviceInfo->SoftErrors != - PhysicalDeviceInfo->SoftErrors) || - (NewPhysicalDeviceInfo->HardErrors != - PhysicalDeviceInfo->HardErrors) || - (NewPhysicalDeviceInfo->MiscellaneousErrors != - PhysicalDeviceInfo->MiscellaneousErrors) || - (NewPhysicalDeviceInfo->CommandTimeouts != - PhysicalDeviceInfo->CommandTimeouts) || - (NewPhysicalDeviceInfo->Retries != - PhysicalDeviceInfo->Retries) || - (NewPhysicalDeviceInfo->Aborts != - PhysicalDeviceInfo->Aborts) || - (NewPhysicalDeviceInfo->PredictedFailuresDetected != - PhysicalDeviceInfo->PredictedFailuresDetected)) - { - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->ParityErrors, - NewPhysicalDeviceInfo->SoftErrors, - NewPhysicalDeviceInfo->HardErrors, - NewPhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Critical("Physical Device %d:%d Errors: " - "Timeouts = %d, Retries = %d, " - "Aborts = %d, Predicted = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->CommandTimeouts, - NewPhysicalDeviceInfo->Retries, - NewPhysicalDeviceInfo->Aborts, - NewPhysicalDeviceInfo - ->PredictedFailuresDetected); - } - if ((PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead || - PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_InvalidState) && - NewPhysicalDeviceInfo->PhysicalDeviceState - != DAC960_V2_Device_Dead) - Controller->V2.NeedDeviceSerialNumberInformation = true; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - } - NewPhysicalDeviceInfo->LogicalUnit++; - Controller->V2.PhysicalDeviceIndex++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) - { - unsigned int DeviceIndex; - for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices; - DeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[DeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - } - Controller->V2.NeedPhysicalDeviceInformation = false; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - unsigned short LogicalDeviceNumber = - NewLogicalDeviceInfo->LogicalDeviceNumber; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber]; - if (LogicalDeviceInfo == NULL) - { - DAC960_V2_PhysicalDevice_T PhysicalDevice; - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists%s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (LogicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (LogicalDeviceInfo != NULL) - { - memset(LogicalDeviceInfo, 0, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - DAC960_ComputeGenericDiskInfo(Controller); - } - } - if (LogicalDeviceInfo != NULL) - { - unsigned long LogicalDeviceSize = - NewLogicalDeviceInfo->ConfigurableDeviceSize; - if (NewLogicalDeviceInfo->LogicalDeviceState != - LogicalDeviceInfo->LogicalDeviceState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "ONLINE" - : NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "CRITICAL" : "OFFLINE")); - if ((NewLogicalDeviceInfo->SoftErrors != - LogicalDeviceInfo->SoftErrors) || - (NewLogicalDeviceInfo->CommandsFailed != - LogicalDeviceInfo->CommandsFailed) || - (NewLogicalDeviceInfo->DeferredWriteErrors != - LogicalDeviceInfo->DeferredWriteErrors)) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) Errors: " - "Soft = %d, Failed = %d, Deferred Write = %d\n", - Controller, LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - NewLogicalDeviceInfo->SoftErrors, - NewLogicalDeviceInfo->CommandsFailed, - NewLogicalDeviceInfo->DeferredWriteErrors); - if (NewLogicalDeviceInfo->ConsistencyCheckInProgress) - DAC960_V2_ReportProgress(Controller, - "Consistency Check", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ConsistencyCheckBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->RebuildInProgress) - DAC960_V2_ReportProgress(Controller, - "Rebuild", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->RebuildBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Background Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->BackgroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->ForegroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Foreground Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ForegroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->DataMigrationInProgress) - DAC960_V2_ReportProgress(Controller, - "Data Migration", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->DataMigrationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->PatrolOperationInProgress) - DAC960_V2_ReportProgress(Controller, - "Patrol Operation", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->PatrolOperationBlockNumber, - LogicalDeviceSize); - if (LogicalDeviceInfo->BackgroundInitializationInProgress && - !NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) " - "Background Initialization %s\n", - Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Completed" : "Failed")); - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - } - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDeviceNumber] = true; - NewLogicalDeviceInfo->LogicalDeviceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL || - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber]) - continue; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->V2.LogicalDeviceInformation - [LogicalDriveNumber] = NULL; - kfree(LogicalDeviceInfo); - Controller->LogicalDriveInitiallyAccessible - [LogicalDriveNumber] = false; - DAC960_ComputeGenericDiskInfo(Controller); - } - Controller->V2.NeedLogicalDeviceInformation = false; - } - else if (CommandOpcode == DAC960_V2_SCSI_10_Passthru) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[Controller->V2.PhysicalDeviceIndex - 1]; - - if (CommandStatus != DAC960_V2_NormalCompletion) { - memset(InquiryUnitSerialNumber, - 0, sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, - Controller->V2.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - Controller->V2.NeedDeviceSerialNumberInformation = false; - } - - if (Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - - Controller->V2.NextEventSequenceNumber > 0) - { - CommandMailbox->GetEvent.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->GetEvent.DataTransferSize = sizeof(DAC960_V2_Event_T); - CommandMailbox->GetEvent.EventSequenceNumberHigh16 = - Controller->V2.NextEventSequenceNumber >> 16; - CommandMailbox->GetEvent.ControllerNumber = 0; - CommandMailbox->GetEvent.IOCTL_Opcode = - DAC960_V2_GetEvent; - CommandMailbox->GetEvent.EventSequenceNumberLow16 = - Controller->V2.NextEventSequenceNumber & 0xFFFF; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.EventDMA; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->GetEvent.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedPhysicalDeviceInformation) - { - if (Controller->V2.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Controller->V2.NewPhysicalDeviceInformation->Channel, - Controller->V2.NewPhysicalDeviceInformation->TargetID, - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit - 1); - - - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.StartPhysicalDeviceInformationScan) - { - Controller->V2.PhysicalDeviceIndex = 0; - Controller->V2.NewPhysicalDeviceInformation->Channel = 0; - Controller->V2.NewPhysicalDeviceInformation->TargetID = 0; - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit = 0; - Controller->V2.StartPhysicalDeviceInformationScan = false; - } - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = - Controller->V2.NewPhysicalDeviceInformation->TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = - Controller->V2.NewPhysicalDeviceInformation->Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedLogicalDeviceInformation) - { - if (Controller->V2.StartLogicalDeviceInformationScan) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber] = false; - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber = 0; - Controller->V2.StartLogicalDeviceInformationScan = false; - } - CommandMailbox->LogicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V2_KernelCommand_T *KernelCommand = Command->V2.KernelCommand; - KernelCommand->CommandStatus = CommandStatus; - KernelCommand->RequestSenseLength = Command->V2.RequestSenseLength; - KernelCommand->DataTransferLength = Command->V2.DataTransferResidue; - Command->V2.KernelCommand = NULL; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V2_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - -/* - DAC960_GEM_InterruptHandler handles hardware interrupts from DAC960 GEM Series - Controllers. -*/ - -static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_GEM_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - -/* - DAC960_BA_InterruptHandler handles hardware interrupts from DAC960 BA Series - Controllers. -*/ - -static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_BA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LP_InterruptHandler handles hardware interrupts from DAC960 LP Series - Controllers. -*/ - -static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LP_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LA_InterruptHandler handles hardware interrupts from DAC960 LA Series - Controllers. -*/ - -static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PG_InterruptHandler handles hardware interrupts from DAC960 PG Series - Controllers. -*/ - -static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_PG_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PD_InterruptHandler handles hardware interrupts from DAC960 PD Series - Controllers. -*/ - -static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_P_InterruptHandler handles hardware interrupts from DAC960 P Series - Controllers. - - Translations of DAC960_V1_Enquiry and DAC960_V1_GetDeviceState rely - on the data having been placed into DAC960_Controller_T, rather than - an arbitrary buffer. -*/ - -static irqreturn_t DAC960_P_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandOpcode_T CommandOpcode = - CommandMailbox->Common.CommandOpcode; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - switch (CommandOpcode) - { - case DAC960_V1_Enquiry_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Enquiry; - DAC960_P_To_PD_TranslateEnquiry(Controller->V1.NewEnquiry); - break; - case DAC960_V1_GetDeviceState_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_GetDeviceState; - DAC960_P_To_PD_TranslateDeviceState(Controller->V1.NewDeviceState); - break; - case DAC960_V1_Read_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Read; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Write; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_V1_QueueMonitoringCommand queues a Monitoring Command to DAC960 V1 - Firmware Controllers. -*/ - -static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Enquiry; - CommandMailbox->Type3.BusAddress = Controller->V1.NewEnquiryDMA; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueMonitoringCommand queues a Monitoring Command to DAC960 V2 - Firmware Controllers. -*/ - -static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_MonitoringTimerFunction is the timer function for monitoring - the status of DAC960 Controllers. -*/ - -static void DAC960_MonitoringTimerFunction(struct timer_list *t) -{ - DAC960_Controller_T *Controller = from_timer(Controller, t, MonitoringTimer); - DAC960_Command_T *Command; - unsigned long flags; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V1_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - else - { - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned int StatusChangeCounter = - Controller->V2.HealthStatusBuffer->StatusChangeCounter; - bool ForceMonitoringCommand = false; - if (time_after(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL) continue; - if (!LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized) - { - ForceMonitoringCommand = true; - break; - } - } - Controller->SecondaryMonitoringTime = jiffies; - } - if (StatusChangeCounter == Controller->V2.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == Controller->V2.NextEventSequenceNumber && - (ControllerInfo->BackgroundInitializationsActive + - ControllerInfo->LogicalDeviceInitializationsActive + - ControllerInfo->PhysicalDeviceInitializationsActive + - ControllerInfo->ConsistencyChecksActive + - ControllerInfo->RebuildsActive + - ControllerInfo->OnlineExpansionsActive == 0 || - time_before(jiffies, Controller->PrimaryMonitoringTime - + DAC960_MonitoringTimerInterval)) && - !ForceMonitoringCommand) - { - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - return; - } - Controller->V2.StatusChangeCounter = StatusChangeCounter; - Controller->PrimaryMonitoringTime = jiffies; - - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V2_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - /* - Wake up any processes waiting on a Health Status Buffer change. - */ - wake_up(&Controller->HealthStatusWaitQueue); - } -} - -/* - DAC960_CheckStatusBuffer verifies that there is room to hold ByteCount - additional bytes in the Combined Status Buffer and grows the buffer if - necessary. It returns true if there is enough room and false otherwise. -*/ - -static bool DAC960_CheckStatusBuffer(DAC960_Controller_T *Controller, - unsigned int ByteCount) -{ - unsigned char *NewStatusBuffer; - if (Controller->InitialStatusLength + 1 + - Controller->CurrentStatusLength + ByteCount + 1 <= - Controller->CombinedStatusBufferLength) - return true; - if (Controller->CombinedStatusBufferLength == 0) - { - unsigned int NewStatusBufferLength = DAC960_InitialStatusBufferSize; - while (NewStatusBufferLength < ByteCount) - NewStatusBufferLength *= 2; - Controller->CombinedStatusBuffer = kmalloc(NewStatusBufferLength, - GFP_ATOMIC); - if (Controller->CombinedStatusBuffer == NULL) return false; - Controller->CombinedStatusBufferLength = NewStatusBufferLength; - return true; - } - NewStatusBuffer = kmalloc_array(2, Controller->CombinedStatusBufferLength, - GFP_ATOMIC); - if (NewStatusBuffer == NULL) - { - DAC960_Warning("Unable to expand Combined Status Buffer - Truncating\n", - Controller); - return false; - } - memcpy(NewStatusBuffer, Controller->CombinedStatusBuffer, - Controller->CombinedStatusBufferLength); - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NewStatusBuffer; - Controller->CombinedStatusBufferLength *= 2; - Controller->CurrentStatusBuffer = - &NewStatusBuffer[Controller->InitialStatusLength + 1]; - return true; -} - - -/* - DAC960_Message prints Driver Messages. -*/ - -static void DAC960_Message(DAC960_MessageLevel_T MessageLevel, - unsigned char *Format, - DAC960_Controller_T *Controller, - ...) -{ - static unsigned char Buffer[DAC960_LineBufferSize]; - static bool BeginningOfLine = true; - va_list Arguments; - int Length = 0; - va_start(Arguments, Controller); - Length = vsprintf(Buffer, Format, Arguments); - va_end(Arguments); - if (Controller == NULL) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - DAC960_ControllerCount, Buffer); - else if (MessageLevel == DAC960_AnnounceLevel || - MessageLevel == DAC960_InfoLevel) - { - if (!Controller->ControllerInitialized) - { - if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CombinedStatusBuffer - [Controller->InitialStatusLength], - Buffer); - Controller->InitialStatusLength += Length; - Controller->CurrentStatusBuffer = - &Controller->CombinedStatusBuffer - [Controller->InitialStatusLength + 1]; - } - if (MessageLevel == DAC960_AnnounceLevel) - { - static int AnnouncementLines = 0; - if (++AnnouncementLines <= 2) - printk("%sDAC960: %s", DAC960_MessageLevelMap[MessageLevel], - Buffer); - } - else - { - if (BeginningOfLine) - { - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", - DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else printk("%s", Buffer); - } - } - else if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CurrentStatusBuffer[ - Controller->CurrentStatusLength], Buffer); - Controller->CurrentStatusLength += Length; - } - } - else if (MessageLevel == DAC960_ProgressLevel) - { - strcpy(Controller->ProgressBuffer, Buffer); - Controller->ProgressBufferLength = Length; - if (Controller->EphemeralProgressMessage) - { - if (time_after_eq(jiffies, Controller->LastProgressReportTime - + DAC960_ProgressReportingInterval)) - { - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - Controller->LastProgressReportTime = jiffies; - } - } - else printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else if (MessageLevel == DAC960_UserCriticalLevel) - { - strcpy(&Controller->UserStatusBuffer[Controller->UserStatusLength], - Buffer); - Controller->UserStatusLength += Length; - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else - { - if (BeginningOfLine) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - else printk("%s", Buffer); - } - BeginningOfLine = (Buffer[Length-1] == '\n'); -} - - -/* - DAC960_ParsePhysicalDevice parses spaces followed by a Physical Device - Channel:TargetID specification from a User Command string. It updates - Channel and TargetID and returns true on success and false on failure. -*/ - -static bool DAC960_ParsePhysicalDevice(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *Channel, - unsigned char *TargetID) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XChannel, XTargetID; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XChannel = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != ':' || - XChannel >= Controller->Channels) - return false; - UserCommandString = ++NewUserCommandString; - XTargetID = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XTargetID >= Controller->Targets) - return false; - *Channel = XChannel; - *TargetID = XTargetID; - return true; -} - - -/* - DAC960_ParseLogicalDrive parses spaces followed by a Logical Drive Number - specification from a User Command string. It updates LogicalDriveNumber and - returns true on success and false on failure. -*/ - -static bool DAC960_ParseLogicalDrive(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *LogicalDriveNumber) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XLogicalDriveNumber; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XLogicalDriveNumber = - simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XLogicalDriveNumber > DAC960_MaxLogicalDrives - 1) - return false; - *LogicalDriveNumber = XLogicalDriveNumber; - return true; -} - - -/* - DAC960_V1_SetDeviceState sets the Device State for a Physical Device for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_SetDeviceState(DAC960_Controller_T *Controller, - DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - DAC960_V1_PhysicalDeviceState_T - DeviceState, - const unsigned char *DeviceStateString) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_StartDevice; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.DeviceState = DeviceState; - CommandMailbox->Type3D.Modifier = 0; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("%s of Physical Device %d:%d Succeeded\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_UnableToStartDevice: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unable to Start Device\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_NoDeviceAtAddress: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "No Device at Address\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_InvalidChannelOrTargetOrModifier: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Invalid Channel or Target or Modifier\n", - Controller, DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_ChannelBusy: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Channel Busy\n", Controller, - DeviceStateString, Channel, TargetID); - break; - default: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - DeviceStateString, Channel, TargetID, - Command->V1.CommandStatus); - break; - } -} - - -/* - DAC960_V1_ExecuteUserCommand executes a User Command for DAC960 V1 Firmware - Controllers. -*/ - -static bool DAC960_V1_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V1_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V1.CommandMailbox; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Flush; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState != DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Dead, "Kill"); - else DAC960_UserCritical("Kill of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Online, "Make Online"); - else DAC960_UserCritical("Make Online of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Standby, "Make Standby"); - else DAC960_UserCritical("Make Standby of Physical " - "Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID)) - { - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_RebuildAsync; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Initiated\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_AttemptToRebuildOnlineDrive: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Attempt to Rebuild Online or " - "Unresponsive Drive\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_NewDiskFailedDuringRebuild: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "New Disk Failed During Rebuild\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_InvalidDeviceAddress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Invalid Device Address\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Rebuild or Consistency Check Already " - "in Progress\n", Controller, Channel, TargetID); - break; - default: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - Channel, TargetID, Command->V1.CommandStatus); - break; - } - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->Type3C.CommandOpcode = DAC960_V1_CheckConsistencyAsync; - CommandMailbox->Type3C.LogicalDriveNumber = LogicalDriveNumber; - CommandMailbox->Type3C.AutoRestore = true; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Initiated\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_DependentDiskIsDead: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Dependent Physical Device is DEAD\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_InvalidOrNonredundantLogicalDrive: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Invalid or Nonredundant Logical Drive\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - Rebuild or " - "Consistency Check Already in Progress\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - default: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Unexpected Status %04X\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, Command->V1.CommandStatus); - break; - } - } - else if (strcmp(UserCommand, "cancel-rebuild") == 0 || - strcmp(UserCommand, "cancel-consistency-check") == 0) - { - /* - the OldRebuildRateConstant is never actually used - once its value is retrieved from the controller. - */ - unsigned char *OldRebuildRateConstant; - dma_addr_t OldRebuildRateConstantDMA; - - OldRebuildRateConstant = pci_alloc_consistent( Controller->PCIDevice, - sizeof(char), &OldRebuildRateConstantDMA); - if (OldRebuildRateConstant == NULL) { - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Out of Memory", - Controller); - goto failure; - } - CommandMailbox->Type3R.CommandOpcode = DAC960_V1_RebuildControl; - CommandMailbox->Type3R.RebuildRateConstant = 0xFF; - CommandMailbox->Type3R.BusAddress = OldRebuildRateConstantDMA; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild or Consistency Check Cancelled\n", - Controller); - break; - default: - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Unexpected Status %04X\n", - Controller, Command->V1.CommandStatus); - break; - } -failure: - pci_free_consistent(Controller->PCIDevice, sizeof(char), - OldRebuildRateConstant, OldRebuildRateConstantDMA); - } - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - - -/* - DAC960_V2_TranslatePhysicalDevice translates a Physical Device Channel and - TargetID into a Logical Device. It returns true on success and false - on failure. -*/ - -static bool DAC960_V2_TranslatePhysicalDevice(DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - unsigned short - *LogicalDeviceNumber) -{ - DAC960_V2_CommandMailbox_T SavedCommandMailbox, *CommandMailbox; - DAC960_Controller_T *Controller = Command->Controller; - - CommandMailbox = &Command->V2.CommandMailbox; - memcpy(&SavedCommandMailbox, CommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_TranslatePhysicalToLogicalDevice; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.PhysicalToLogicalDeviceDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - - DAC960_ExecuteCommand(Command); - *LogicalDeviceNumber = Controller->V2.PhysicalToLogicalDevice->LogicalDeviceNumber; - - memcpy(CommandMailbox, &SavedCommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - return (Command->V2.CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ExecuteUserCommand executes a User Command for DAC960 V2 Firmware - Controllers. -*/ - -static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - unsigned short LogicalDeviceNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V2.CommandMailbox; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits.DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits.NoAutoRequestSense = true; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->DeviceOperation.IOCTL_Opcode = DAC960_V2_PauseDevice; - CommandMailbox->DeviceOperation.OperationDevice = - DAC960_V2_RAID_Controller; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Dead; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Kill of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Online; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Online of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Standby; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Standby of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStart; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-rebuild", 14) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[14], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStart; - CommandMailbox->ConsistencyCheck.RestoreConsistency = true; - CommandMailbox->ConsistencyCheck.InitializedAreaOnly = false; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-consistency-check", 24) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[24], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strcmp(UserCommand, "perform-discovery") == 0) - { - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_StartDiscovery; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Discovery %s\n", Controller, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - if (Command->V2.CommandStatus == DAC960_V2_NormalCompletion) - { - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = - DAC960_V2_GetControllerInfo; - /* - * How does this NOT race with the queued Monitoring - * usage of this structure? - */ - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - while (1) { - DAC960_ExecuteCommand(Command); - if (!Controller->V2.NewControllerInformation->PhysicalScanActive) - break; - msleep(1000); - } - DAC960_UserCritical("Discovery Completed\n", Controller); - } - } - else if (strcmp(UserCommand, "suppress-enclosure-messages") == 0) - Controller->SuppressEnclosureMessages = true; - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - -static int __maybe_unused dac960_proc_show(struct seq_file *m, void *v) -{ - unsigned char *StatusMessage = "OK\n"; - int ControllerNumber; - for (ControllerNumber = 0; - ControllerNumber < DAC960_ControllerCount; - ControllerNumber++) - { - DAC960_Controller_T *Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) continue; - if (Controller->MonitoringAlertMode) - { - StatusMessage = "ALERT\n"; - break; - } - } - seq_puts(m, StatusMessage); - return 0; -} - -static int __maybe_unused dac960_initial_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); - return 0; -} - -static int __maybe_unused dac960_current_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; - unsigned char *StatusMessage = - "No Rebuild or Consistency Check in Progress\n"; - int ProgressMessageLength = strlen(StatusMessage); - if (jiffies != Controller->LastCurrentStatusTime) - { - Controller->CurrentStatusLength = 0; - DAC960_AnnounceDriver(Controller); - DAC960_ReportControllerConfiguration(Controller); - DAC960_ReportDeviceConfiguration(Controller); - if (Controller->ProgressBufferLength > 0) - ProgressMessageLength = Controller->ProgressBufferLength; - if (DAC960_CheckStatusBuffer(Controller, 2 + ProgressMessageLength)) - { - unsigned char *CurrentStatusBuffer = Controller->CurrentStatusBuffer; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - if (Controller->ProgressBufferLength > 0) - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - Controller->ProgressBuffer); - else - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - StatusMessage); - Controller->CurrentStatusLength += ProgressMessageLength; - } - Controller->LastCurrentStatusTime = jiffies; - } - seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer); - return 0; -} - -static int dac960_user_command_proc_show(struct seq_file *m, void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - - seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer); - return 0; -}