[Return to Library] [Contents] [Previous Chapter] [Next Chapter] [Index] [Help]


B    Example Disk Device Driver

This appendix contains the source listing for a sample disk device driver called /dev/xxx_disk.

/******************************************************************
 * xxx_disk.c
 *
 * This module contains a sample disk device driver.
 *
 ******************************************************************/

/********************** Include Files *****************************/

#include <sys/types.h>
#include <io/common/iotypes.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/secdefines.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/fcntl.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/user.h>
#include <sys/sysmacros.h>
#include <sys/syslog.h>
#include <io/common/pt.h>
#include <io/common/devdriver.h>
#include <io/common/devio.h>
#include <io/common/devgetinfo.h>


/********************* External References ************************/

int      biodone(struct buf *);
int      physio(int (*)(), struct buf *, dev_t, int,
               uint (*)(), struct uio *);
void     log(/* int, char *, arg0, ..., argX */);
void     printf(/* char *, arg0, ..., argX */);
void     bzero(char *, int);
int      bcopy(caddr_t, caddr_t, int);

char *   readdisklabel(dev_t, int (*)(), struct disklabel *);
int      setdisklabel(struct disklabel *, struct disklabel *, uint);
int      writedisklabel(dev_t, int (*)(), struct disklabel *);

int      get_def_partitionmap(DEVGEOMST *, struct pt_tbl *);

struct buf *    getnewbuf();
void     brelse(struct buf *bp);
uint     minphys(struct buf *bp);

/**************** Initialized and Uninitialized Data **************/

/*
 * NOTE: This example does not illustrate SMP locking.
 */

/*
 * Bit format for minor number -- specific to this driver.
 *
 *  Bits
 *    19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
 *  +-------------------------------------------------------------+
 *  |                   device number          |        |Partition|
 *  +-------------------------------------------------------------+
 *
 */
#define PART_MASK                0x7
#define PART_SHIFT               6
#define GET_PARTITION(dev)       (getminor(dev) & PART_MASK)
#define GET_DEVICENUM(dev)       (getminor(dev) >> PART_SHIFT)

/*
 * The device number may be any handle the driver chooses,
 * including a physical location.
 */
#define MAX_XXX_DEVICES                 8

/*
 * This is the maximum transfer size for the xxx devices.
 * If your device has an arbitrary transfer size, it may not need
 * to define a maximum transfer size.  Instead, it can use the
 * system minphys routine to validate maximum transfer sizes.
 */
#define XXX_MAX_XFRLEN           (64 * 1024)   /* 64K bytes */

/*
 * One structure for each xxx device maintains the device state.
 */
typedef struct xxx_device {
    struct buf          *bufhd;
    struct device       *device;
    io_handle_t         dev_handle;
    DEVGEOMST           geometry;
    struct disklabel    label;
    ulong               flags;
    U32                 raw_part_opens;
    U32                 blk_part_opens;
    U32                 label_writeable;
    U32                 media_changes;
    U32                 soft_err_cnt;
    U32                 hard_err_cnt;
} xxx_device_t;

/* xxx_device flags. */
#define READ_ONLY       0x00001

/*
 * An array contains the device structures for all xxx devices
 *
 * Note: This sample driver uses a "dumb" device model for locating
 *    and addressing devices. It assumes each device is given the
 *    next sequential instance number.  (The sample does not show
 *    the probe/attach sequence that builds the relationship between
 *    the sequence number and physical device.)  Your driver may be
 *    much more complex and maintain device data structures in a
 *    different way, such as by physical address.
 */
xxx_device_t    *xxx_devices[MAX_XXX_DEVICES];

/*
 * In this example, a "dummy" hardware request packet does the 
 * actual I/O transaction.  A real driver would replace this with
 * its own mechanisms for processing I/O requests.
 */
typedef struct hw_req_pkt {
    /*
     * Original I/O context
     */
    xxx_device_t    *devp;          /* Target device */
    struct buf      *bp;            /* Initial I/O request */
    enum {  READ,                   /* I/O request type */
            WRITE
         }          dir;
    U32             phys_blknum;    /* Starting block number */
    U32             phys_blkcnt;    /* Number of blocks */
    caddr_t         buff_addr;      /* Buffer pointer */
    U32             error;          /* Completion status */
    int             residual;       /* Number of bytes not 
                                       transferred */
} hw_req_pkt_t;


#define PROBE_SUCCESS       TRUE
#define PROBE_FAIL          FALSE


/******************************************************************
 *
 * Name:
 *   xxx_slave
 *
 * Function:
 *   Determines the presence of a device.
 *
 * Formal Parameters:
 *   device        - Address of a device structure
 *   bus_io_handle - Address of an io_handle_t structure which
 *                   commonly contains a pointer to the device 
 *                   registers
 *
 * Modified Parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   PROBE_SUCCESS - Successful completion
 *   PROBE_FAIL    - Device could not be probed
 *
 * Caller:
 *   Kernel code that configures the system
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

int
xxx_slave(struct device *device, io_handle_t bus_io_handle)
{
    xxx_device_t        *devp;
    int                 device_number;

    /* Validate the device number. */
    device_number = device->logunit;
    if (device_number >= MAX_XXX_DEVICES)
        return(PROBE_FAIL);

    /* Has the device been probed? */
    if (xxx_devices[device_number] != (xxx_device_t *)NULL)
        return(PROBE_SUCCESS);

    /* Allocate a device structure. */
    MALLOC(devp, xxx_device_t *, sizeof(xxx_device_t), M_DEVBUF,
            (M_WAIT | M_ZERO));

    xxx_devices[device_number] = devp;

    /* Initialize device-specific structure. */
    devp->device = device;
    devp->dev_handle = bus_io_handle;

    /*
     * Do whatever is needed to ensure the device is present.
     * Return PROBE_FAIL if the device does not exist.
     */

    return(PROBE_SUCCESS);
}


/******************************************************************
 *
 * Name:
 *   xxx_attach
 *
 * Function:
 *   Configures the device into the topology.
 *
 * Formal parameters:
 *   device - Address of a device structure
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   PROBE_SUCCESS - Successful completion
 *   PROBE_FAIL    - Device could not be attached
 *
 * Caller:
 *   Kernel code that configures the system
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

xxx_attach(struct device *device)
{
    xxx_device_t        *devp;
    int                 device_number;

    /* Validate the device number. */
    if ((device_number = device->logunit) >= MAX_XXX_DEVICES)
        return(PROBE_FAIL);

    devp = xxx_devices[device_number];

    /* Has the device been probed? */
    if (devp == (xxx_device_t *)NULL)
        return(PROBE_FAIL);

    /*
     * Do any other device initialization that is needed.
     */

    return(PROBE_SUCCESS);
}


/******************************************************************
 *
 * Name:
 *   xxx_open
 *
 * Function:
 *   Makes the device ready to accept requests from the user.
 *
 * Formal parameters:
 *   devp - A dev_t for the target device
 *   flag - Read, write, or read-write flags
 *   fmt  - Constant that indicates block or character mode
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return value:
 *   ESUCCESS - Successful completion
 *   ENODEV   - The minor number does not map to a device
 *
 * Caller:
 *   Kernel code through the switch tables
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

int
xxx_open(dev_t dev, int flag, int fmt)
{
    xxx_device_t    *devp;
    int             partmask;
    int             device_number;
    void            read_label(xxx_device_t *devp, dev_t dev);

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(ENODEV);

    /* Grab the data structure that maintains the device state. */
    devp = xxx_devices[device_number];

    /*
     * Extract the partition number from the minor number, create
     * a mask, and set the bit corresponding to the partition.
     */
    partmask = 1 << GET_PARTITION(dev);

    /*
     * Bring the device into a state in which it can be accessed.
     *
     * Note: If the following condition is true:
     *            ((flag & (FNDELAY|FNONBLOCK)))
     *       Then failures that would normally exit with an error
     *       status should be ignored as much as possible.
     */

    /*
     * Query the device for capacity and geometry information and
     * save the results in devp->geometry. 
     *
     * Try to fill in the geometry structure as completely as
     * possible, but at the very least, set the dev_size and 
     * sector_size members.  
     */
    devp->geometry.geom_info.attributes = FILLIN( attributes );
    devp->geometry.geom_info.nsectors = FILLIN( sectors per track );
    devp->geometry.geom_info.interleave =  FILLIN( interleave );
    devp->geometry.geom_info.trackskew = FILLIN( track skew );
    devp->geometry.geom_info.cylskew = FILLIN( cylinder skew );
    devp->geometry.geom_info.ncylinders = FILLIN( # cylinders );
    devp->geometry.geom_info.ntracks = FILLIN( # heads );
    devp->geometry.geom_info.rpm = FILLIN( rotational speed );

    devp->geometry.geom_info.sector_size = FILLIN( block size );
    devp->geometry.geom_info.dev_size = FILLIN( total # of blocks );

    /* Pick relevant values for your hardware. */
    devp->geometry.geom_info.min_trans = 
                        devp->geometry.geom_info.sector_size;
    devp->geometry.geom_info.max_trans = XXX_MAX_XFRLEN;
    devp->geometry.geom_info.prefer_trans = (16 * 1024); /* 16K */

    /*
     * Query the device for interesting properties, such as whether
     * the media is write protected.
     */

    /*
     * Read the disk label if this is the first open to the physical
     * device (not simply the first open on this partition).
     */
    if ((devp->raw_part_opens | devp->blk_part_opens) == 0)
        read_label(devp, dev);

    /* Indicate that this partition is opened. */
    switch (fmt) {
    case S_IFCHR:
        devp->raw_part_opens |= partmask;
        break;
    case S_IFBLK:
        devp->blk_part_opens |= partmask;
        break;
    }

    /*
     * If the device has removable media and an option to
     * programatically disable media removal, do so.  If this 
     * fails, however, do not generate an error message.
     */

    return(ESUCCESS);
}


/******************************************************************
 *
 * Name:
 *   read_label
 *
 * Function:
 *   Reads the disk label from the disk into memory.
 *
 * Formal parameters:
 *   devp - Address of a device-specific structure
 *   dev  - A dev_t for the target device
 *
 * Modified parameters:
 *   The routine initializes the device-specific structure with
 *   either the disk label from the disk or a default disk label.
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   None
 *
 * Caller:
 *   xxx_open on the first open to the physical device
 *
 * Side effects:
 *   None
 *
 ******************************************************************/
static void
read_label(xxx_device_t *devp, dev_t dev)
{
    struct disklabel    *lp = &devp->label;
    char                *statusmsg;
    struct pt_tbl       ptable;
    dev_t               tempdev;
    int                 i;
    void                xxx_strategy(struct buf *bp);

    /*
     * Create a "dummy" of the driver's disk label so a service
     * routine can read the disk label.  The service routine will
     * invoke the strategy routine, which must execute against a
     * valid disk label.  As there is no valid partition map yet,
     * the dummy label sets partition 0 to span the entire disk
     * for this read operation, and the dev_t passed to the service
     * routine and strategy indicates that the I/O should be
     * executed against partition 0.
     */
    lp->d_magic = 0;
    lp->d_secsize = devp->geometry.geom_info.sector_size;
    lp->d_secperunit = devp->geometry.geom_info.dev_size;

    /*
     * Set secpercyl.  The service routine uses it to set up the
     * buf structure for the strategy call.
     */
    lp->d_secpercyl = 1;
    lp->d_nsectors = lp->d_secperunit;

    /*
     * Dummy up one partition that spans the entire disk.
     */
    lp->d_npartitions = 1;
    lp->d_partitions[0].p_offset = 0;
    lp->d_partitions[0].p_size = lp->d_secperunit;

    /*
     * Create the temporary dev_t for the service routine with the
     * same device handle but with the partition number set to 0.
     */
    tempdev = makedev(getmajor(dev),(getminor(dev) & ~PART_MASK));

    /*
     * The strategy routine will check to make sure a valid open 
     * context exists, so temporarily set the open valid bits for 
     * partition 0.
     */
    devp->raw_part_opens |= 1;
    devp->blk_part_opens |= 1;

    /* Use the service routine to read the label. */
    statusmsg = readdisklabel(tempdev, (int (*)())xxx_strategy, lp);
    if (statusmsg != NULL) {
        /* FAILURE - No disk label. */

        /* Optionally, log an error message to the system. */
        log(LOG_ERR, "XXX disk %d: error reading disk label -- %s\n",
                        GET_DEVICENUM(dev), statusmsg);

        /*
         * Create a default partition map.  The driver may use the
         * service routine to derive this or use its own algorithm 
         * and/or table.
         *
         * At minimum, the "c" partition (index 2) should span the
         * entire disk.  Digital recommends that the "a" partition
         * (index 0) span the entire disk as well.
         */

        /*
         * Mark the label invalid.  It can be set to valid only if
         * the driver reads a valid label.
         */
        lp->d_magic = 0;

        /*
         * Call the generic routine that calculates the default
         * partition layout.
         */
        if (get_def_partitionmap(&devp->geometry, &ptable) == 0) {
            /* SUCCESS */

            /* Copy the map to the disk label. */
            lp->d_npartitions = 8;
            for (i=0; i<8; i++) {
                lp->d_partitions[i].p_offset =
                        ptable.d_partitions[i].p_offset;
                lp->d_partitions[i].p_size =
                        ptable.d_partitions[i].p_size;
            }

        } else {
            /* FAILURE
             *
             * get_def_partitionmap rarely fails, but if it should,
             * make partitions "a" and "c" span the entire disk.
             */
            bzero((caddr_t)lp->d_partitions,sizeof(lp->d_partitions));
            lp->d_npartitions = 8;
            lp->d_partitions[0].p_size = lp->d_partitions[2].p_size =
                    devp->geometry.geom_info.dev_size;
        }
    }

    /* Clear the open valid bits for partition 0. */
    devp->raw_part_opens &= ~1;
    devp->blk_part_opens &= ~1;

    return;
}


/******************************************************************
 *
 * Name:
 *   xxx_close
 *
 * Formal parameters:
 *   dev   - A dev_t for the target device
 *   flag  - Read, write, or read-write flags
 *   fmt   - Constant that indicates character or block mode
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   ESUCCESS - Successful completion
 *   ENODEV   - Device number does not map to a physical device
 *
 * Caller:
 *   Kernel code through the switch tables
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

int
xxx_close(dev_t dev, int flag, int fmt)
{
    xxx_device_t    *devp;
    int             partmask;
    int             device_number;
    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(ENODEV);

    /* Grab the data structure that maintains the device state. */
    devp = xxx_devices[device_number];

    /*
     * Extract the partition number from the minor number, create
     * a mask, and set the bit corresponding to the partition.
     */
    partmask = 1 << GET_PARTITION(dev);

    switch (fmt) {
    case S_IFCHR:
        devp->raw_part_opens &= ~partmask;
        break;
    case S_IFBLK:
        devp->blk_part_opens &= ~partmask;
        break;
    }

    /*
     * If this is not the last open on the physical device
     * (at least one partition is still open), return.
     */
    if ((devp->raw_part_opens | devp->blk_part_opens) != 0)
        return(ESUCCESS);

    /*
     * This is the last open on the physical device.
     */

    /*
     * Reset the label writeable flag so that whoever now opens
     * the device has to go through the process of enabling write
     * protection if they want to write the label.
     */
    devp->label_writeable = FALSE;

    /*
     * If the device has removable media and it was programatically
     * disabled in the xxx_open routine, do whatever it takes to
     * re-enable media removal.  If this fails, the driver may
     * generate an error message.
     */

    return(ESUCCESS);
}


/******************************************************************
 *
 * Name:
 *   xxx_size
 *
 * Function:
 *   Obtains the size of the partition.
 *
 * Formal parameters:
 *   dev - A dev_t for the target device
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   Partition size for the device on successful completion
 *   -1 - Device number does not map to a physical device or a valid
 *        partition map does not exist for this device
 * Caller:
 *   The kernel through the switch tables.
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

daddr_t
xxx_size(dev_t dev)
{
    xxx_device_t    *devp;
    int             partition;
    int             device_number;

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(-1);

    /* Grab the data structure that maintains the device state. */
    devp = xxx_devices[device_number];

    /* Extract the partition number from the minor number. */
    partition = GET_PARTITION(dev);

    /* Does a valid partition map exist for this index? */
    if (partition >= devp->label.d_npartitions)
        return(-1);
    else
        return(devp->label.d_partitions[partition].p_size);
}


/******************************************************************
 *
 * Name:
 *   xxx_read
 *
 * Function:
 *   Performs character-mode read operations.
 *
 * Formal parameters:
 *   dev - A dev_t for the target device
 *   uio - Address of a uio structure
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   ENODEV - Minor number does not map to a physical device
 *   EBADF  - The partition is not open
 *   EIO    - The I/O request is not aligned on a block boundary
 *
 * Caller:
 *   Kernel code through the switch tables
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

int
xxx_read(dev_t dev, struct uio *uio)
{
    struct buf      *bp;
    xxx_device_t    *devp;
    int             status;
    int             device_number;
    void            xxx_strategy(struct buf *bp);
    void            xxx_minphys(struct buf *bp);

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(ENODEV);
    devp = xxx_devices[device_number];

    /* Is the partition open? */
    if (((devp->raw_part_opens | devp->blk_part_opens) &
            (1 << GET_PARTITION(dev))) != 0)
        return(EBADF);

    /* Validate that the request is aligned on a block boundary. */
    if ((uio->uio_offset %
         devp->geometry.geom_info.sector_size) != 0)
        return(EIO);

    /*
     * Allocate a buf structure for the request.
     *
     * Note: The getnewbuf service routine blocks until the request
     *       can succeed.
     *
     * Alternative implementation: The driver may allocate a single
     *       buf structure per device or maintain a pool of buffers.
     *       You may also use generic kernel allocators to allocate
     *       buf memory, but getnewbuf guarantees that the buf
     *       structure is properly initialized. 
     *
     * Note: If getnewbuf fails due to allocation errors, return 
     *       ENOMEM.
     */
    bp = getnewbuf();

    /*
     * Call physio with the driver's strategy routine.  Physio 
     * locks down the buffer pages and sends a buf structure 
     * through strategy for each I/O vector (iov) in the user I/O 
     * (uio) structure.
     *
     * If the device supports a large arbitrary transfer length,
     * pass the address of the system minphys routine instead of
     * supplying an xxx_minphys routine.
     */
    status = physio((int (*)())xxx_strategy, bp, dev, B_READ,
                    (uint (*)())xxx_minphys, uio);

    /*
     * Return the buf structure to the buffer pool.
     */
    brelse(bp);

    return(status);
}


/******************************************************************
 *
 * Name:
 *   xxx_write
 *
 * Function:
 *   Perfoms character-mode write operations.
 *
 * Formal parameters:
 *   dev - A dev_t for the target device
 *   uio - Address of a uio structure
 *
 * Modified parameters:
 *   None
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return values:
 *   ENODEV - The minor number does not map to a physical device
 *   EBADF  - The partition is not open
 *   EIO    - The write request is not aligned on a block boundary
 *
 * Caller:
 *   Kernel code through the switch tables
 *
 * Size effects:
 *   None
 *
 ******************************************************************/

int
xxx_write(dev_t dev, struct uio *uio)
{
    struct buf      *bp;
    xxx_device_t    *devp;
    int             status;
    int             device_number;
    void            xxx_strategy(struct buf *bp);
    void            xxx_minphys(struct buf *bp);

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(ENODEV);

    devp = xxx_devices[device_number];

    /* Is this partition open? */
    if (((devp->raw_part_opens | devp->blk_part_opens) &
            (1 << GET_PARTITION(dev))) != 0)
        return(EBADF);

    /* Validate that the request is aligned on a block boundary. */
    if ((uio->uio_offset % 
         devp->geometry.geom_info.sector_size) != 0)
        return(EIO);

    /*
     * Allocate a buf structure for the I/O request.
     *
     * Note: The getnewbuf service routine blocks until the request
     *       can succeed.
     *
     * Alternative implementation: The driver may allocate a single
     *       buf structure per device or maintain a pool of buffers.
     *       You may also use generic kernel allocators to allocate
     *       buf memory, but getnewbuf guarantees that the buf
     *       structure is properly initialized. 
     *
     * Note: If getnewbuf fails due to allocation errors, return 
     *       ENOMEM.
     */
    bp = getnewbuf();

    /*
     * Call physio with the driver's strategy routine.  Physio locks
     * down buffer pages and sends a buf structure through strategy
     * for each I/O vector (iov) in the user I/O (uio) structure.
     *
     * If the device supports a large arbitrary transfer length,
     * pass the address of the system minphys routine instead of
     * supplying an xxx_minphys routine.
     */
    status = physio((int (*)())xxx_strategy, bp, dev, B_WRITE,
                    (uint (*)())xxx_minphys, uio);

    /*
     * Return the buf structure to the buffer pool.
     */
    brelse(bp);

    return(status);
}


/******************************************************************
 *
 * Name:
 *   xxx_minphys
 *
 * Function:
 *   Passed to physio by the read/write routines to make sure the
 *   resulting I/O fits within the maximum transfer length for the
 *   device.  If not, the routine adjusts the byte count so that it 
 *   is within the allowable transfer length.
 *
 * Formal parameters:
 *   bp - Address of a buf structure
 *
 * Modified parameters:
 *   If the length of the data exceeds the maximum transfer length,
 *   the routines sets the b_count member of the buf structure to
 *   the device's maximum transfer length.
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return value:
 *   None
 *
 * Caller:
 *   The physio routine on return from the driver's strategy routine
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

void
xxx_minphys(struct buf *bp)
{
    if (bp->b_bcount > XXX_MAX_XFRLEN)
        bp->b_bcount = XXX_MAX_XFRLEN;

    return;
}


/******************************************************************
 *
 * Name:
 *   xxx_strategy
 *
 * Function:
 *   Performs all block-oriented read and write operations.
 *
 * Formal parameters:
 *   bp - Address of a buf structure
 *
 * Modified parameters:
 *   When an error occurs, the routine initializes members of the 
 *   buf structure as follows:
 *
 *   b_resid - Number of bytes not transferred
 *   b_flags - B_ERROR
 *   b_error - Error status value
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return value:
 *   None
 *
 * Caller:
 *   The physio routine for character I/O operations
 *   The kernel through the switch tables for block I/O operations
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

void
xxx_strategy(struct buf *bp)
{
    xxx_device_t        *devp;
    struct partition    *pp;
    hw_req_pkt_t        *hw_pkt;
    int                 partition;
    int                 device_number;
    U32                 nblocks;
    U32                 start_blk;

    /*
     * Validate that the I/O request is properly block aligned.
     *
     * Alternatively, the driver could use a read-modify-write 
     * sequence to transfer data of any length.  The driver writer
     * must decide if the complexity of this task is worth the 
     * effort.
     */
    if ((bp->b_bcount % 
         devp->geometry.geom_info.sector_size) != 0) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = EIO;
        (void)biodone(bp);
        return;
    }

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(bp->b_dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL)) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = ENODEV;
        (void)biodone(bp);
        return;
    }

    /* Grab the data structure that maintains the device state. */
    devp = xxx_devices[device_number];

    /* Extract the partition number from the minor number. */
    partition = GET_PARTITION(bp->b_dev);

    /* Validate the partition. */
    if (partition >= devp->label.d_npartitions) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = ENXIO;
        (void)biodone(bp);
        return;
    }
    pp = &devp->label.d_partitions[partition];

    /* Does the partition have a valid length? */
    if (pp->p_size == 0) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = EROFS;
        (void)biodone(bp);
        return;
    }

    /* Is the partition open? */
    if (((devp->raw_part_opens | devp->blk_part_opens) &
            (1 << partition)) != 0) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = EBADF;
        (void)biodone(bp);
        return;
    }

    /*
     * Is this is a write request for a read-only device?
     */
    if ((devp->flags & READ_ONLY) && ((bp->b_flags & B_READ) == 0)) {
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = EROFS;
        (void)biodone(bp);
        return;
    }

    /*
     * Perform range checking on the location/length of the I/O
     * request.
     */

    /* How many blocks does the I/O cross? */
    nblocks = (bp->b_bcount + 
               (devp->geometry.geom_info.sector_size - 1)) /
               devp->geometry.geom_info.sector_size;

    /* Calculate the physical block where the I/O starts. */
    start_blk = bp->b_blkno + pp->p_offset;

    /*
     * Will the request write over the disk label?
     *
     * The label can be overwritten if it is a default label (one
     * that was not read from the disk), or if the driver's 
     * label_writeable flag has been set to TRUE by an ioctl
     * command.
     */

    if ((start_blk <= LABELSECTOR) &&     /* it's the label area */
        ((bp->b_flags & B_READ) == 0) &&         /* it's a write */
        (devp->label.d_magic == DISKMAGIC) &&   /* default label */
        (devp->label_writeable == FALSE)) { /* not write-enabled */

        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = EROFS;
        (void)biodone(bp);
        return;
    }

    /*
     * Validate that the physical block number is within range and
     * that the partition is not off the end of the disk.  If so, the
     * disk label is invalid.
     */
    if ((bp->b_blkno < 0) || (bp->b_blkno >= pp->p_size) ||
        (pp->p_offset >= devp->geometry.geom_info.dev_size) ) {
        /*
         * POSIX says do not return an error for reads, but do for
         * writes.
         */
        if ((bp->b_flags & B_READ) == 0) {
            bp->b_flags |= B_ERROR;
            bp->b_error = ENOSPC;
        }
        bp->b_resid = bp->b_bcount;
        (void)biodone(bp);
        return;
    }

    /* Transfer up to the end of the partition. */
    if ((bp->b_blkno + nblocks) > pp->p_size )  {
        /* Save the original count in case of failure. */
        bp->b_resid = bp->b_bcount;
        /* Replace the count with the number of blocks remaining. */
        bp->b_bcount = (pp->p_size - bp->b_blkno) *
                            devp->geometry.geom_info.sector_size;
    } else  {
        /* Initialize the current residual count. */
        bp->b_resid = 0;
    }

    /* Validation is now complete. */

    /*
     * For illustration purposes, the sample driver creates a dummy
     * hardware packet for the I/O request.
     */
    MALLOC(hw_pkt, hw_req_pkt_t *, sizeof(hw_req_pkt_t), M_DEVBUF,
            (M_NOWAIT | M_ZERO));
    if (hw_pkt == (hw_req_pkt_t *)NULL) {
        /*
         * A production driver should deal with this error and queue
         * the I/O until resources are available.  Keep in mind that
         * you cannot block in a strategy routine.
         */
        bp->b_flags |= B_ERROR;
        bp->b_resid = bp->b_bcount;
        bp->b_error = ENOMEM;
        (void)biodone(bp);
        return;
    }


    /* The actual request: */

    hw_pkt->devp = devp;
    hw_pkt->bp = bp;
    hw_pkt->dir = (bp->b_flags & B_READ) ? READ : WRITE;
    hw_pkt->phys_blknum = start_blk;
    hw_pkt->phys_blkcnt = ((U32)bp->b_bcount /
                            devp->geometry.geom_info.sector_size);
    hw_pkt->buff_addr = bp->b_un.b_addr;

    /* Kick off the I/O request on the hardware. */

    return;
}


/******************************************************************
 *
 * Name:
 *   xxx_hw-pkt_complete
 *
 * Function:
 *   This routine handles the completion of a hardware packet.
 *   It would typically be called from the device driver's
 *   interrupt routine.
 *
 *   The routine finishes up all completion for the original I/O
 *   request.  This example will not concern itself with retries
 *   and the like.
 *
 * Formal parameters:
 *   hw_pkt - Address of the data packet to be sent to the device
 *
 * Modified parameters:
 *   The routine sets the b_resid member of the buf structure to the
 *   number of bytes that were not transferred, or 0 if all were
 *   transferred.
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return value:
 *   None
 *
 * Caller:
 *   Driver's interrupt routine
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

static void
xxx_hw_pkt_complete(hw_req_pkt_t *hw_pkt)
{
    xxx_device_t        *devp;
    struct buf          *bp;
    int                 trunc_resid;

    /* Restore the I/O context. */
    devp = hw_pkt->devp;
    bp = hw_pkt->bp;

    /*
     * If the transfer length was greater than the maximum allowed,
     * find out how many bytes were not transferred and restore the
     * original byte count.
     */
    if (bp->b_resid) {
        /* How much was chopped off by the end of the partition? */
        trunc_resid = bp->b_resid - bp->b_bcount;

        /* Restore the original transfer count. */
        bp->b_bcount = bp->b_resid;

    } else {
        trunc_resid = 0;
    }


    /* It's all or nothing for disks. */
    bp->b_resid = trunc_resid + hw_pkt->residual;

    /* Was there an error? */
    if (hw_pkt->error) {
        bp->b_flags |= B_ERROR;
        bp->b_error = EIO;
    }

    biodone(bp);

    /* Return the hardware request packet to the pool. */
    FREE(hw_pkt, M_DEVBUF);

    return;
}


/******************************************************************
 *
 * Name:
 *   xxx_ioctl
 *
 * Function:
 *   Handles all nonread and nonwrite I/O requests.
 *
 * Formal parameters:
 *   dev  - A dev_t for the target device
 *   cmd  - The ioctl command
 *   data - Address of a user data buffer
 *   flag - Flags from the file handle
 *
 * Modified parameters:
 *   Some ioctl commands pass data back to the caller through the
 *   data buffer.
 *
 * Implicit input:
 *   None
 *
 * Implicit output:
 *   None
 *
 * Return value:
 *   ENODEV  - Minor number does not map to a physical device
 *   EBADF   - The partition was not open
 *   EINVAL  - Disk label is invalid
 *   EBADF   - Command needs an open context, buf can't find one
 *   EACCESS - User doesn't have the necessary priviledges 
 *
 * Caller:
 *   Kernel code through the switch tables
 *
 * Side effects:
 *   None
 *
 ******************************************************************/

int
xxx_ioctl(dev_t dev, U32 cmd, caddr_t data, int flag)
{
    xxx_device_t        *devp;
    struct disklabel    *lp;
    int                 retval = ESUCCESS;
    struct partition    *pp;
    int                 partition;
    int                 device_number;
    U32                 current_opens;

    /* Extract the physical device handle from the minor number. */
    device_number = GET_DEVICENUM(dev);

    /* Validate that the minor number maps to a device. */
    if ((device_number >= MAX_XXX_DEVICES) ||
        (xxx_devices[device_number] == (xxx_device_t *)NULL))
        /* No such device. */
        return(ENODEV);

    /* Grab the data structure that maintains the device state. */
    devp = xxx_devices[device_number];
    lp = &devp->label;
    current_opens = (devp->raw_part_opens | devp->blk_part_opens);

    /* Extract the partition number from the minor number. */
    partition = GET_PARTITION(dev);
    pp = &lp->d_partitions[partition];

    /* Is the partition open? */
    if (((devp->raw_part_opens | devp->blk_part_opens) &
            (1 << GET_PARTITION(dev))) != 0) {

        /*
         * There are only 3 ioctls that do not require a valid open
         * context - DEVGETINFO, DEVIOCGET, and DEVROOT.
         */
        if ((cmd != DEVGETINFO) &&
            (cmd != DEVIOCGET))
            return(EBADF);
    }

    switch (cmd) {

    /*
     * Return a pointer to a disklabel structure.
     */
    case DIOCGDINFO:
        {
        /* If the disk label is invalid, don't return success. */
        if (lp->d_magic != DISKMAGIC)
            return(EINVAL);

        /* Copy the disk label. */
        *(struct disklabel *)data = *lp;

        break;
        }

    /*
     * Return information about a partition.
     *
     * Note: This is a kernel-level routine, so passing pointers
     *       is okay.
     */
    case DIOCGPART:
        {
        /* If the disk label is invalid, don't return success. */
        if ((lp->d_magic != DISKMAGIC) ||
              (partition >= lp->d_npartitions))
            return(EINVAL);

        /* Assign the disk label and partition to the data buffer */
        ((struct partinfo *)data)->disklab = lp;
        ((struct partinfo *)data)->part = pp;

        break;
        }

    /*
     * Set a disklabel structure.
     */
    case DIOCSDINFO:
        {
        if ((flag & FWRITE) == 0)
            return(EBADF);

        /*
         * Call a service routine to validate label changes and
         * update the in-memory copy of the disk label.
         */
        if (lp->d_magic == DISKMAGIC) {
            /* Label is valid, so pass a valid partition mask. */
            retval = setdisklabel(lp, (struct disklabel *)data, 
                                  current_opens);
        } else {
            /*
             * When using a default label, don't worry about open
             * partitions.
             */
            retval = setdisklabel(lp, (struct disklabel *)data, 0);
        }

        break;
        }

    /* 
     * Set a write-enable label.
     */
    case DIOCWLABEL:
        {
        if ((flag & FWRITE) == 0)
            return(EBADF);

        /* 
         * Assign the value in the data buffer to the label 
         * writeable flag. 
         */
        devp->label_writeable = *(int *)data;

        break;
        }

    /*
     * Write a disklabel.
     */
    case DIOCWDINFO:
        {
        struct disklabel    *new_lp = (struct disklabel *)data;
        U32                 current_label_wrtbl;

        if ((flag & FWRITE) == 0)
            return(EBADF);

        /*
         * If the partition in the new label that maps to this dev_t 
         * does not start at block 0, the driver may not be able to
         * write the label to disk.
         */
        if ((partition >= lp->d_npartitions) ||
            (new_lp->d_partitions[partition].p_offset != 0))
            return(EINVAL);

        /*
         * Call a service routine to validate label changes and to
         * update the in-memory version of the label.
         */
        if (lp->d_magic == DISKMAGIC) {
            /* Label is valid, so pass an open partition mask. */
            retval = setdisklabel(lp, new_lp, current_opens);
        } else {
            /*
             * Using a default label, so don't worry about open
             * partitions.
             */
            retval = setdisklabel(lp, new_lp, 0);
        }

        if (retval != ESUCCESS)
            /* FAILURE - break out of switch statement. */
            break;

        /*
         * Patch the label writeable flag so the driver can write 
         * the label.
         */
        current_label_wrtbl = devp->label_writeable;
        devp->label_writeable = TRUE;

        /*
         * Use the service routine to write the disk label via our
         * strategy routine.
         *
         * Note: If the partition does not start at block 0, the write
         *       will succeed but will not replace the old disk label.
         *       It will be written to the wrong place on the disk.
         */
        retval = writedisklabel(dev, (int (*)())xxx_strategy, lp);

        /* Restore the label writeable flag. */
        devp->label_writeable = current_label_wrtbl;

        break;
        }

    /*
     * Create a default partition map.
     */
    case DIOCGDEFPT:
        {
        struct pt_tbl   *ptable = (struct pt_tbl *)data;

        /* 
         * Call the service routine to create a default partition 
         * map.
         */
        if (get_def_partitionmap(&devp->geometry, ptable) != 0) {
            /* 
             * If get_def_partitionmap fails, create "a" and "c"
             * partitions that span the entire disk.
             */ 
            bzero((caddr_t)ptable, sizeof(struct pt_tbl));
            ptable->d_partitions[0].p_size =
                ptable->d_partitions[2].p_size =
                    devp->geometry.geom_info.dev_size;
        }
        break;
        }

    /* 
     * Return the current partition map.
     */
    case DIOCGCURPT:
        {
        /* Assign the current partition map to the data buffer. */
        *(struct pt_tbl *)data = *(struct pt_tbl *)lp->d_partitions;
        break;
        }

    /*
     * Return device geometry.
     */
    case DEVGETGEOM:
        {
        /* Check that the disk geometry is known. */
        if ((devp->geometry.geom_info.ntracks == 0) ||
            (devp->geometry.geom_info.nsectors == 0) ||
            (devp->geometry.geom_info.ncylinders == 0))
            return(EIO);

        /*
         * If the device can dynamically change its geometry, query
         * the device for the latest status.  This example copies 
         * the geometry values that were set up on open.
         */
        *(DEVGEOMST *)data = *(&devp->geometry);

        break;
        }


    /*
     * Return device status
     *
     * Note: This ioctl must be able to operate without having a
     *       previous open on the device as it is used by the 
     *       kernel at startup.
     */
    case DEVGETINFO:
        {
        struct device           *device;
        v1_device_info_t        *devi_p;
        v1_bustype_info_t       *busp;
        v1_disk_dev_info_t      *diskp;

        device = devp->device;

        devi_p = (v1_device_info_t *)data;
        bzero((caddr_t)devi_p,sizeof(*devi_p));

        /****************************************************
         * Fill in generic information.
         ****************************************************/

        devi_p->version    = VERSION_1;
        devi_p->category   = DEV_DISK;
        devi_p->bus        = FILLIN( whatever is appropriate );
        bcopy("XXX", devi_p->interface, 3);
        bcopy("xxxdev", devi_p->device, 6);
        bcopy("xx", devi_p->dev_name, 2);
        devi_p->soft_count = devp->soft_err_cnt;
        devi_p->hard_count = devp->hard_err_cnt;

        /****************************************************
         * Fill in (topology) bus-generic information.
         ****************************************************/

        busp = &devi_p->businfo;
        if (device != (struct device *)NULL) {
            /*
             * This is the bus slot (e.g., pci slot) of the adaptor.
             */
            busp->nexus_num = device->ctlr_hd->slot;
            /*
             * This is the instance of the device relative to the 
             * adaptor.  For example, an adaptor may have two 
             * controller chips.  Chip A would be instance 0, and 
             * chip B instance 1.  This example assumes only one 
             * instance on the adaptor.
             */
            busp->adpt_num = 0;
            /*
             * This is the logical bus number that the adaptor is
             * plugged into.
             */
            busp->bus_num = device->ctlr_hd->bus_hd->bus_num;

            /*
             * This is the system-wide instance number of the adaptor.
             * For example, 2 adaptors with 2 controller chips would
             * result in instance 0, instance 1, instance 2, instance 
             * 3, which correspond to adaptor 0 chip A, adaptor 0 
             * chip B, adaptor 1 chip A, adaptor 1 chip B.
             */
            busp->ctlr_num = device->ctlr_num;
            /*
             * This is the remote id for the adaptor.  Except for
             * specialized hardware, this will almost always be 0.
             */
            busp->rctlr_num = device->ctlr_hd->rctlr;
            /*
             * This is the device instance relative to the controlling
             * hardware.  For example, if there were 2 disk devices 
             * off of chip A, one would be slave 0, the other slave 1.
             * This example assumes only 1 slave.
             */
            busp->slave_num = 0;
            /*
             * This is the system-wide device instance number.
             */
            busp->unit_num = device_number;
        } else {
            busp->nexus_num =  -1;
            busp->adpt_num =  -1;
            busp->bus_num =  -1;
            busp->ctlr_num = -1;
            busp->rctlr_num = 0;
            busp->slave_num = 0;
            busp->unit_num = device_number;
        }

        /****************************************************
         * Fill in bus-specific information.
         ****************************************************/

        /* Fill in whatever is relevant for your hardware. */

        /****************************************************
         * Fill in category-specific information.
         ****************************************************/

        diskp = (v1_disk_dev_info_t *)&devi_p->devinfo;
        diskp->class        = DKDEV_CLS_HARDDISK;
        diskp->part_num     = partition;
        diskp->blocksz      = devp->geometry.geom_info.sector_size;
        diskp->capacity     = devp->geometry.geom_info.dev_size;

        /*
         * Fill in fields appropriate for your device.  Be aware
         * of attributes for online/offline, write protect,
         * removable media, media changes, and so on.  Also set
         * subclasses, such as cdrom or floppy, if applicable.
         */

        /*****************************************************
         * Fill in category-specific architecture information.
         *****************************************************/

        /* Fill in whatever is relevant for your hardware. */

        break;
        }


    /*
     * Return device status.
     *
     * This ioctl has been replaced by the DEVGETINFO ioctl, but
     * you may want to implement it for backward compatibility.
     */
    case DEVIOCGET:
        {
        struct device           *device;
        struct devget           *devget;

        device = devp->device;

        devget = (struct devget *)data;
        bzero((caddr_t)devget,sizeof(struct devget));

        devget->category = DEV_DISK;
        devget->bus = FILLIN( whatever is appropriate );
        bcopy("XXX", devget->interface, 3);
        bcopy("xxxdev", devget->device, 6);
        bcopy("xx", devget->dev_name, 2);
        devget->soft_count = devp->soft_err_cnt;
        devget->hard_count = devp->hard_err_cnt;

        if (device != (struct device *)NULL) {
            /*
             * This is the bus slot (e.g., pci slot) for the adaptor.
             */
            devget->nexus_num = device->ctlr_hd->slot;
            /*
             * This is the instance of the device relative to the 
             * adaptor.  For example, an adaptor may have two 
             * controller chips.  Chip A would be instance 0, and 
             * chip B instance 1.  This example assumes only one 
             * instance on this adaptor.
             */
            devget->adpt_num = 0;
            /*
             * This is the logical bus number that the adaptor is
             * plugged into.
             */
            devget->bus_num = device->ctlr_hd->bus_hd->bus_num;

            /*
             * This is the system-wide instance number of the adaptor.
             * For example, 2 adaptors with 2 controller chips would
             * result in instance 0, instance 1, instance 2, instance 
             * 3, which correspond to adaptor 0 chip A, adaptor 0 
             * chip B, adaptor 1 chip A, adaptor 1 chip B.
             */
            devget->ctlr_num = device->ctlr_num;
            /*
             * This is the remote id for the adaptor.  Except for
             * specialized hardware, this will almost always be 0.
             */
            devget->rctlr_num = device->ctlr_hd->rctlr;
            /*
             * This is the device instance relative to the controlling
             * hardware.  For example, if there were 2 disk devices 
             * off of chip A, one would be slave 0, the other slave 1.
             * This example assumes only 1 slave.
             */
            devget->slave_num = 0;
            /*
             * This is the system-wide device instance number.
             */
            devget->unit_num = device_number;
        } else {
            devget->nexus_num =  -1;
            devget->adpt_num =  -1;
            devget->bus_num =  -1;
            devget->ctlr_num = -1;
            devget->rctlr_num = 0;
            devget->slave_num = 0;
            devget->unit_num = device_number;
        }

        devget->category_stat = partition;

        /*
         * Fill in fields appropriate for your device.  Be aware
         * of attributes for online/offline, write protect,
         * removable media, media changes, and so on.  Also set
         * subclasses, such as cdrom or floppy, if applicable.
         *
         * If reporting media change count, do so as follows:
         *      devget->category_stat |= DEV_MC_COUNT;
         *      devget->category_stat |=
         *              (devp->media_changes << 16);
         *
         */

        break;
        }

    default:
        retval = EINVAL;
        break;

    }   /* switch(cmd) */

    return(retval);