/*
 * Copyright 2002 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef	_SYS_MDI_IMPLDEFS_H
#define	_SYS_MDI_IMPLDEFS_H

#pragma ident	"@(#)mdi_impldefs.h	1.4	02/01/15 SMI"

#include <sys/note.h>
#include <sys/sunmdi.h>

#ifdef	__cplusplus
extern "C" {
#endif

#ifdef _KERNEL

/*
 * Multipath Driver Interfaces
 *
 * This project delivers two new modules in to Solaris.  A misc.  module
 * (mpxio) provides the core multipath framework and a driver module,
 * (scsi_vhci) provides the SCSI-III command set driver functionality for
 * managing Fibre-Channel storage devices.
 *
 * The mpxio framework consists of 4 major components:
 *
 * 1) vHCI (Virtual Host Controller Interconnect 'scsi_vhci')
 * 2) pHCI (Physical Host Controller Interconnect 'fcp')
 * 3) Client (Multipath Target Device - 'ssd')
 * 4) Multipath information nodes (mdi_pathinfo nodes - Connects the
 *    Clients and pHCI device nodes in the Solaris Device Tree).
 *
 * Solaris Device Tree (with mpxio enabled)
 *
 *	+-----------+   +-----------+
 *      | vhci      |   |  pci@1f,0 |
 *      +-----------+   +-----------+
 *         /     \               \
 * +----------+ +-----------+    +-------------+
 * |Client 1  | | Client 2  |    | qlc@0,0     |
 * +----------+ +-----------+    +-------------+
 *   |          |                  /        \
 *   |          |        +-------------+   +------------+
 *   |          |        | pHCI 1      |   |  pHCI 2    |
 *   |          |        +-------------+   +------------+
 *   |          |          /        |      /          |
 *   |          |    +------+       |    +------+     |
 *   |          |    |  ssd |       |    |  ssd |     |
 *   |          |    | (OBP)|       |    | (OBP)|     |
 *   |          |    +------+       |    +------+     |
 *   |          |                   |                 |
 *   |          |               +-------+           +--------+
 *   |          |-------------->| Path  |---------->| Path   |
 *   |                          | Info  |           | Info   |
 *   |                          | node 1|           | node 3 |
 *   |                          +-------+           +--------+
 *   |                              |                 |
 *   |                          +-------+           +--------+
 *   |------------------------->| Path  |---------->| Path   |
 *                              | Info  |           | Info   |
 *                              | node 2|           | node 4 |
 *                              +-------+           +--------+
 *
 * This project introduces multipath information nodes (mdi_pathinfo
 * nodes) which establishes the relationship between the pseudo Client
 * driver instance nodes and the physical host controller interconnect (pHCI
 * drivers) forming a matrix struture.
 *
 * The mpxio module implements locking at a multi-level granularity to
 * support the needs of various consumers and requirements.  The multipath
 * matrix can be globally, Column and row locked depending on the consumer.
 * The intention is to keep it simple at the same time, we do not want to
 * compromise on the performance.
 *
 * As in any multipathing solution there are three major problems to solve:
 * Identification and Enumeration of multipath client devices, Optimal
 * selection of a path to route I/O request and provide interfaces to snap
 * shot the multipath matrix on user requests.
 *
 * Locking:
 *
 * The current implementation basically involves around five different locks:
 *
 * System global read/write lock devinfo_tree_lock, mdi_mutex, (framework global
 * mutex), per-pHCI structure mutex, per-Client structure mutex and per
 * mdi_pathinfo node mutex.
 *
 * mdi_mutex, protects the vHCI list, per-vHCI structure and the
 * list of pHCIs and Client devices registered against them (protection
 * against multi-threaded add/remove).
 *
 * devinfo_tree_lock, protects system wide creation/removal of mdi_pathinfo
 * nodes into the multipath matrix.  Multipath walking drivers (devinfo) can
 * freeze the multipathing matrix by acquiring a READER on devinfo_tree_lock.
 *
 * per-pHCI lock (mutex) protects the column (pHCI-mdi_pathinfo node list)
 * and per-pHCI structure fields.  mdi_pathinfo node creation, deletion and
 * child mdi_pathinfo node state changes are serialized on per pHCI basis
 * (Protection against DR).
 *
 * per-Client (mutex) lock protects the row (Client-mdi_pathinfo node list)
 * and per-Client structure fields.  Client-mdi_pathinfo node list is
 * walked typically to select an optimal path to route I/O requests
 * (load-balancing and path selection).
 *
 * per-mdi_pathinfo lock (mutex) protects the mdi_pathinfo node structure
 * fields.
 *
 * Note:
 *
 * per-Client structure and per-pHCI fields are freely readable when
 * corresponding mdi_pathinfo locks are held.  As holding mdi_pathinfo node
 * guarentees that its corresponding client and pHCI devices will not be
 * freed.
 *
 */

/*
 * Most MDI functions return success or failure
 */
#define	MDI_SUCCESS		0	/* Call Success			*/
#define	MDI_FAILURE		-1	/* Unspecified Error		*/
#define	MDI_NOMEM		-2	/* No resources available	*/
#define	MDI_ACCEPT		-3	/* Request accepted		*/
#define	MDI_BUSY		-4	/* Busy				*/
#define	MDI_NOPATH		-5	/* No more paths are available	*/
#define	MDI_EINVAL		-6	/* Invalid parameter		*/
#define	MDI_VHCI_DISABLED	-7	/* vHCI disabled		*/
#define	MDI_NOT_SUPPORTED	-8	/* Device not supported		*/
#define	MDI_DEVI_ONLINING	-9	/* Devi is onlining		*/

/*
 * MDI managed devi property definitions
 */
extern const char			*mdi_component_prop;
extern const char			*mdi_component_prop_vhci;
extern const char			*mdi_component_prop_phci;
extern const char			*mdi_component_prop_client;

#define	MDI_COMPONENT_PROP		(char *)mdi_component_prop
#define	MDI_COMPONENT_PROP_VHCI		(char *)mdi_component_prop_vhci
#define	MDI_COMPONENT_PROP_PHCI		(char *)mdi_component_prop_phci
#define	MDI_COMPONENT_PROP_CLIENT	(char *)mdi_component_prop_client

/*
 * MDI Client global unique identifier property name string definition
 */
extern const char			*mdi_client_guid_prop;
#define	MDI_CLIENT_GUID_PROP		(char *)mdi_client_guid_prop

/*
 * MDI Client load balancing policy definitions
 *
 * By default, load balancing is enabled on a vHCI Driver basis and
 * configurable through vHCI driver's driver.conf.
 */
typedef enum {
	LOAD_BALANCE_NONE,		/* Alternate pathing		*/
	LOAD_BALANCE_RR			/* Round Robin			*/
} client_lb_t;

/*
 * MDI client load balancing property name/value string definitions
 */
extern const char			*mdi_load_balance;
extern const char			*mdi_load_balance_none;
extern const char			*mdi_load_balance_ap;
extern const char			*mdi_load_balance_rr;

#define	LOAD_BALANCE_PROP		(char *)mdi_load_balance
#define	LOAD_BALANCE_PROP_NONE		(char *)mdi_load_balance_none
#define	LOAD_BALANCE_PROP_AP		(char *)mdi_load_balance_ap
#define	LOAD_BALANCE_PROP_RR		(char *)mdi_load_balance_rr

/*
 * MDI vHCI class definitions
 */
extern const char			*mdi_vhci_class_scsi;
#define	MDI_HCI_CLASS_SCSI		(char *)mdi_vhci_class_scsi

/*
 * vHCI drivers:
 *
 * vHCI drivers are pseudo nexus drivers which implement multipath services
 * for a specific command set or bus architecture.  There is a single instance
 * of vHCI driver for each command set which supports multipath devices.
 *
 * Currently there exists only one vHCI driver to support Fibre Channel Storage
 * devices.
 *
 * vHCI drivers register a set of callback operation vectors, typically from
 * there attach(9e) entry point.
 *
 * vHCI driver operation vectors
 */
#define	MDI_VHCI_OPS_REV_1		1
/*
 * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV
 */
#define	MDI_VHCI_OPS_REV	MDI_VHCI_OPS_REV_1
#define	MDI_VHCI_OPS_REV_NAME	"1"

/*	Version macros of the form 4.h-1-1.45	*/

#define	MDI_VHCI_MAJOR_VERSION	"5"
#define	MDI_VHCI_SUB_VERSION	"a"

#define	MDI_VHCI_VERSION \
	MDI_VHCI_MAJOR_VERSION"."MDI_VHCI_SUB_VERSION\
	"-"MDI_VHCI_OPS_REV_NAME"-"

typedef struct mdi_vhci_ops {
	/* revision management */
	int	vo_revision;

	/* mdi_pathinfo node init callback */
	int	(*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip,
		    int flags);

	/* mdi_pathinfo node uninit callback */
	int	(*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip,
		    int flags);

	/* mdi_pathinfo node state change callback */
	int	(*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip,
		    mdi_pathinfo_state_t state, int flags);

	/* Client path failvoer callback */
	int	(*vo_failover)(dev_info_t *vdip, dev_info_t *cdip,
		    int flags);
} mdi_vhci_ops_t;

/*
 * This framework creates struct mdi_vhci structure for every registered class
 * of vHCI driver.  This structure is created and bound to a vHCI driver
 * instance's dev_info handle, when the vHCI driver register themself from their
 * attach(9e) entry point.  This structure is unbound and freed from the vHCI
 * driver instance, when the vHCI driver unregister themself from their
 * detach(9e) entry point.
 *
 * Each vHCI driver is associated with a vHCI class name.  Currently this
 * implementation defines only for SCSI ("scsi_vhci").
 *
 * Global mdi_mutex, protects the structure members.
 *
 * Depending on the context some of the fields can be freely read without
 * holding any locks (For ex. holding a childs lock also guarentees that vHCI
 * (parent) can not be unexpectedly freed.
 *
 * per-vHCI structure definitions
 */
struct mdi_vhci {
	struct mdi_vhci		*vh_next;	/* next link		*/
	struct mdi_vhci		*vh_prev;	/* prev link		*/
	int			vh_flags;	/* Operation flags	*/
	dev_info_t		*vh_dip;	/* devi handle		*/
	char			*vh_class;	/* Class name		*/
	struct mdi_vhci_ops	*vh_ops;	/* Callback vectors	*/
	client_lb_t		vh_lb;		/* Global cache		*/
	int			vh_phci_count;	/* pHCI device count	*/
	struct mdi_phci		*vh_phci_head;	/* pHCI list head	*/
	struct mdi_phci		*vh_phci_tail;	/* pHCI list tail	*/
	int			vh_client_count;	/* Client count	*/
	struct client_hash	*vh_client_table;	/* Client hash	*/
};
typedef struct mdi_vhci mdi_vhci_t;

/*
 * GUID Hash definitions
 *
 * Since all the mpxio managed mpxio devices are enumerated under single
 * pseudo vHCI instance, we do not want to sequentally walk through the client
 * device link.
 */

#define	CLIENT_HASH_TABLE_SIZE	(32)	/* GUID hash */

/*
 * Client hash table structure
 */
struct client_hash {
	struct mdi_client	*ct_hash_head;	/* Client hash head	*/
	int			ct_hash_count;	/* Client hash count	*/
};


/*
 * pHCI Drivers:
 *
 * The physical HBA drivers provide the necessary transport services.
 * Every pHCI driver instance register themselves with the mpxio framework by
 * using mdi_phci_register() from their attach(9e) entry point against a vHCI
 * class.  This framework creates (struct mdi_phci) and binds with the pHCI
 * device's dev_info handle.  This structure is unbound from pHCI device's
 * dev_info node when the pHCI driver unregisters themselves by calling
 * mdi_phci_unregister() from their detach(9e) driver entry point.
 *
 * As pHCI register themselves against a vHCI class, mpxio framework maintains
 * a list of registered pHCI device instances.  This list is identified through
 * vHCI->vh_phci_count, vHCI->vh_phci_head, vHCI->vh_phci_tail and
 * pHCI->ph_next and pHCI->ph_tail and is exclusively protected by the global
 * mdi_mutex.
 *
 * Locking order:
 *
 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex))
 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock))
 *
 * per-pHCI structure definitions
 *
 */
struct mdi_phci {
	kmutex_t		ph_mutex;	/* per-pHCI mutex	*/
	struct mdi_phci		*ph_next;	/* next link		*/
	struct mdi_phci		*ph_prev;	/* prev link		*/
	dev_info_t		*ph_dip;	/* devi handle		*/
	struct mdi_vhci 	*ph_vhci;	/* back ref. to vHCI	*/
	int			ph_flags;	/* pHCI operation flags	*/
	int			ph_path_count;	/* child pi count	*/
	mdi_pathinfo_t		*ph_path_head;	/* pi list head		*/
	mdi_pathinfo_t		*ph_path_tail;	/* pi list tail		*/
	int			ph_unstable;	/* Paths in transient state */
	kcondvar_t		ph_unstable_cv;	/* Paths in transient state */
};
typedef struct mdi_phci mdi_phci_t;

/*
 * A pHCI device is in unstable while one or more paths are in transitional
 * state.  We do not allow Hotplug to take place while paths are in transient
 * state.
 */
#define	MDI_PHCI_UNSTABLE(ph)		(ph)->ph_unstable++;
#define	MDI_PHCI_STABLE(ph) { \
	(ph)->ph_unstable--; \
	if ((ph)->ph_unstable == 0) { \
		cv_broadcast(&(ph)->ph_unstable_cv); \
	} \
}

/*
 * per-pHCI lock macros
 */
#define	MDI_PHCI_LOCK(ph)		mutex_enter(&((ph))->ph_mutex)
#define	MDI_PHCI_TRYLOCK(ph)		mutex_tryenter(&((ph))->ph_mutex)
#define	MDI_PHCI_UNLOCK(ph)		mutex_exit(&((ph))->ph_mutex)

/*
 * pHCI state definitions and macros to track the pHCI driver instance state
 */
#define	MDI_PHCI_FLAGS_OFFLINE		1	/* pHCI is offline */
#define	MDI_PHCI_FLAGS_SUSPEND		2	/* pHCI is suspended */
#define	MDI_PHCI_FLAGS_POWER_DOWN	4	/* pHCI is power down */
#define	MDI_PHCI_FLAGS_DETACH		8	/* pHCI is detached */

#define	MDI_PHCI_IS_READY(ph) \
	    ((ph)->ph_flags == 0)

#define	MDI_PHCI_SET_OFFLINE(ph) \
	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE)

#define	MDI_PHCI_SET_ONLINE(ph) \
	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE)

#define	MDI_PHCI_SET_SUSPEND(ph) \
	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND)

#define	MDI_PHCI_SET_RESUME(ph) \
	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND)

#define	MDI_PHCI_IS_SUSPENDED(ph) \
	    ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND)

#define	MDI_PHCI_SET_DETACH(ph) \
	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH)

#define	MDI_PHCI_SET_ATTACH(ph) \
	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH)

#define	MDI_PHCI_SET_POWER_DOWN(ph) \
	    ((ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN)

#define	MDI_PHCI_SET_POWER_UP(ph) \
	    ((ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_UP)

/*
 * Client:
 *
 * This framework creates a struct mdi_client for every client device created
 * by the framework as a result of self-enumeration of target devices by the
 * registered pHCI devices.  This structure is bound to client device dev_info
 * node at the time of client device allocation (ndi_devi_alloc(9e)). This
 * structure is unbound from the dev_info node when mpxio framework removes a
 * client device node from the system.
 *
 * This structure is created when a first path is enumerated and removed when
 * last path is de-enumerated from the system.
 *
 * Multipath client devices are instantiated as children of corresponding vHCI
 * driver instance. Each client device is uniquely identified by a GUID
 * provided by target device itself.  The parent vHCI device also maintains a
 * hashed list of client devices, protected by the global mdi_mutex.
 *
 * Typically pHCI devices self-enumerate their child devices using taskq,
 * resulting in multiple paths to the same client device to be enumerated by
 * competing threads.  mdi_mutex is also used to serialize the client device
 * creation.
 *
 * Currently this framework supports two kinds of load-balancing policy
 * configurable through the vHCI driver configuration files.
 *
 * NONE		- Legacy AP mode
 * Round Robin	- Balance the pHCI load in a Round Robin fashion.
 *
 * This framework identifies the client device in three distinct states:
 *
 * OPTIMAL	- Client device has atleast one redundant path.
 * DEGRADED	- No redundant paths (critical).  Failure in the current active
 *                path would result in data access failures.
 * FAILED 	- No paths are available to access this device.
 *
 * Locking order:
 *
 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex))
 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock))
 *
 * per-Client structure
 *
 */
struct mdi_client {
	kmutex_t		ct_mutex;	/* per-client mutex	*/
	struct mdi_client	*ct_hnext;	/* next client		*/
	struct mdi_client	*ct_hprev;	/* prev client		*/
	dev_info_t		*ct_dip;	/* client devi handle	*/
	struct mdi_vhci		*ct_vhci;	/* vHCI back ref	*/
	char			*ct_drvname;	/* client driver name	*/
	char			*ct_guid;	/* client guid		*/
	void			*ct_cprivate;	/* vHCI driver private	*/
	client_lb_t		ct_lb;		/* load balancing scheme */
	int			ct_flags;	/* Driver op. flags	*/
	int			ct_state;	/* state information	*/
	int			ct_failover_flags;	/* Failover args */
	int			ct_failover_status;	/* last fo status */
	kcondvar_t		ct_failover_cv;	/* Failover status cv	*/
	int			ct_path_count;	/* multi path count	*/
	mdi_pathinfo_t		*ct_path_head;	/* multi path list head	*/
	mdi_pathinfo_t		*ct_path_tail;	/* multi path list tail	*/
	mdi_pathinfo_t		*ct_path_last;	/* last path used for i/o */
	int			ct_unstable;	/* Paths in transient state */
	kcondvar_t		ct_unstable_cv;	/* Paths in transient state */
};
typedef struct mdi_client mdi_client_t;

/*
 * per-Client device locking definitions
 */
#define	MDI_CLIENT_LOCK(ct)		mutex_enter(&((ct))->ct_mutex)
#define	MDI_CLIENT_TRYLOCK(ct)		mutex_tryenter(&((ct))->ct_mutex)
#define	MDI_CLIENT_UNLOCK(ct)		mutex_exit(&((ct))->ct_mutex)

/*
 * A Client device is in unstable while one or more paths are in transitional
 * state.  We do not allow failover to take place while paths are in transient
 * state. Similarly we do not allow state transition while client device
 * failover is in progress.
 */
#define	MDI_CLIENT_UNSTABLE(ct)		(ct)->ct_unstable++;
#define	MDI_CLIENT_STABLE(ct) { \
	(ct)->ct_unstable--; \
	if ((ct)->ct_unstable == 0) { \
		cv_broadcast(&(ct)->ct_unstable_cv); \
	} \
}

/*
 * Client driver instance state definitions:
 */
#define	MDI_CLIENT_FLAGS_OFFLINE	0x00000001
#define	MDI_CLIENT_FLAGS_SUSPEND	0x00000002
#define	MDI_CLIENT_FLAGS_POWER_DOWN	0x00000004
#define	MDI_CLIENT_FLAGS_DETACH		0x00000008
#define	MDI_CLIENT_FLAGS_FAILOVER	0x00000010
#define	MDI_CLIENT_FLAGS_REPORT_DEV	0x00000020

#define	MDI_CLIENT_SET_OFFLINE(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE)

#define	MDI_CLIENT_SET_ONLINE(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE)

#define	MDI_CLIENT_IS_OFFLINE(ct) \
	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE)

#define	MDI_CLIENT_SET_SUSPEND(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND)

#define	MDI_CLIENT_SET_RESUME(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND)

#define	MDI_CLIENT_IS_SUSPENDED(ct) \
	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND)

#define	MDI_CLIENT_SET_POWER_DOWN(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN)

#define	MDI_CLIENT_SET_POWER_UP(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_UP)

#define	MDI_CLIENT_SET_DETACH(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH)

#define	MDI_CLIENT_SET_ATTACH(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH)

#define	MDI_CLIENT_IS_DETACHED(ct) \
	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH)

#define	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER)

#define	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER)

#define	MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \
	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER)

#define	MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) \
	    ((ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV)

#define	MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) \
	    ((ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV)

#define	MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \
	    ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV)

/*
 * Client operating states.
 */
#define	MDI_CLIENT_STATE_OPTIMAL	1
#define	MDI_CLIENT_STATE_DEGRADED	2
#define	MDI_CLIENT_STATE_FAILED		3

#define	MDI_CLIENT_STATE(ct) ((ct)->ct_state)
#define	MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state)

#define	MDI_CLIENT_IS_FAILED(ct) \
	    ((ct)->ct_state == MDI_CLIENT_STATE_FAILED)

/*
 * mdi_pathinfo nodes:
 *
 * A 'path' as defined by this project is a tuple consisting of a client or
 * end device, a host controller which provides device identification and
 * transport services (pHCI) and bus specific unit addressing information.
 * A path may be decorated with properties which describe the capabilities
 * of the path; such properties are analogous to device node and minor node
 * properties.
 *
 * The framework maintains link list of mdi_pathinfo nodes created by every
 * pHCI driver instance.  This framework guarentees that all the nodes created
 * by a pHCI device instances are freed, before the pHCI unregistration is
 * performed.
 *
 * Locking order:
 *
 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex))
 *
 * mdi_pathinfo node structure definition
 */
struct mdi_pathinfo {
	kmutex_t		pi_mutex;	/* per path mutex	*/
	mdi_pathinfo_state_t	pi_state;	/* path state		*/
	mdi_pathinfo_state_t	pi_old_state;	/* path state		*/
	kcondvar_t		pi_state_cv;	/* path state condvar	*/
	mdi_client_t		*pi_client;	/* client		*/
	mdi_phci_t		*pi_phci;	/* pHCI dev_info node	*/
	char			*pi_addr;	/* path unit address	*/
	nvlist_t		*pi_prop;	/* Properties		*/
	void			*pi_cprivate;	/* client private info	*/
	void			*pi_pprivate;	/* phci private info	*/
	struct mdi_pathinfo	*pi_client_link; /* next path in client list */
	struct mdi_pathinfo	*pi_phci_link;	 /* next path in phci list */
	int			pi_ref_cnt;	/* pi reference count	*/
	kcondvar_t		pi_ref_cv;	/* condition variable	*/
	struct mdi_pi_kstats	*pi_kstats;	/* aggregate kstats */
};

/*
 * pathinfo statistics
 * The mpxio architecture allows for multiple pathinfo nodes for
 * each client-pHCI combination.  For statistics purposes, these
 * statistics are aggregated into a single client-pHCI set of kstats
 */

struct mdi_pi_kstats {
	int	pi_kstat_ref;
	kstat_t	*pi_kstat_iostats;
	kstat_t *pi_kstat_errstats;
};

/*
 * pathinfo error kstat
 */
struct pi_errs {
	struct kstat_named pi_softerrs;
	struct kstat_named pi_harderrs;
	struct kstat_named pi_transerrs;
	struct kstat_named pi_icnt_busy;
	struct kstat_named pi_icnt_errors;
	struct kstat_named pi_phci_rsrc;
	struct kstat_named pi_phci_localerr;
	struct kstat_named pi_phci_invstate;
	struct kstat_named pi_failedfrom;
	struct kstat_named pi_failedto;
};

/*
 * increment an error counter
 */
#define	MDI_PI_ERRSTAT(pip, x) { \
	if (MDI_PI((pip))->pi_kstats != NULL) { \
		struct pi_errs *pep; \
		pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \
		pep->x.value.ui32++; \
	} \
}

/*
 * error codes which can be passed to MDI_PI_ERRSTAT
 */
#define	MDI_PI_SOFTERR	pi_softerrs
#define	MDI_PI_HARDERR	pi_harderrs
#define	MDI_PI_TRANSERR	pi_transerrs
#define	MDI_PI_ICNTBUSY	pi_icnt_busy
#define	MDI_PI_ICNTERR	pi_icnt_errors
#define	MDI_PI_PHCIRSRC	pi_phci_rsrc
#define	MDI_PI_PHCILOCL	pi_phci_localerr
#define	MDI_PI_PHCIINVS	pi_phci_invstate
#define	MDI_PI_FAILFROM	pi_failedfrom
#define	MDI_PI_FAILTO	pi_failedto

#define	MDI_PI(type)			((struct mdi_pathinfo *)(type))

#define	MDI_PI_LOCK(pip)		mutex_enter(&MDI_PI((pip))->pi_mutex)
#define	MDI_PI_UNLOCK(pip)		mutex_exit(&MDI_PI((pip))->pi_mutex)
#define	MDI_PI_HOLD(pip)		(++MDI_PI((pip))->pi_ref_cnt)
#define	MDI_PI_RELE(pip)		(--MDI_PI((pip))->pi_ref_cnt)

/*
 * mdi_pathinfo node state utility definitions
 */
#define	MDI_PATHINFO_STATE_TRANSIENT	0x00010000
#define	MDI_PATHINFO_STATE_MASK		0x0000FFFF

#define	MDI_PI_STATE(pip) \
	    (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK)

#define	MDI_PI_OLD_STATE(pip) \
	    (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK)

#define	MDI_PI_SET_TRANSIENT(pip) \
	    (MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT)

#define	MDI_PI_CLEAR_TRANSIENT(pip) \
	    (MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT)

#define	MDI_PI_IS_TRANSIENT(pip) \
	    (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT)

#define	MDI_PI_IS_INIT(pip) \
	    (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT)

#define	MDI_PI_IS_INITING(pip) \
	    (MDI_PI(pip)->pi_state == \
		(MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT))

#define	MDI_PI_SET_INIT(pip) \
	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT)

#define	MDI_PI_SET_ONLINING(pip) { \
	    MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
	    MDI_PI(pip)->pi_state = \
	    (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \
}

#define	MDI_PI_IS_ONLINING(pip) \
	    (MDI_PI(pip)->pi_state == \
	    (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT))

#define	MDI_PI_SET_ONLINE(pip) \
	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE)

#define	MDI_PI_IS_ONLINE(pip) \
	    (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_ONLINE)

#define	MDI_PI_SET_OFFLINING(pip) { \
	    MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
	    MDI_PI(pip)->pi_state = \
	    (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \
}

#define	MDI_PI_IS_OFFLINING(pip) \
	    (MDI_PI(pip)->pi_state == \
	    (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT))

#define	MDI_PI_SET_OFFLINE(pip) \
	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE)

#define	MDI_PI_IS_OFFLINE(pip) \
	    (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_OFFLINE)

#define	MDI_PI_SET_STANDBYING(pip) { \
	    MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
	    MDI_PI(pip)->pi_state = \
	    (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \
}

#define	MDI_PI_SET_STANDBY(pip) \
	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY)

#define	MDI_PI_IS_STANDBY(pip) \
	    (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_STANDBY)

#define	MDI_PI_SET_FAULTING(pip) { \
	    MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \
	    MDI_PI(pip)->pi_state = \
	    (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \
}

#define	MDI_PI_SET_FAULT(pip) \
	    (MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT)

#define	MDI_PI_IS_FAULT(pip) \
	    (MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_FAULT)

#define	MDI_PI_IS_SUSPENDED(pip) \
	    ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)

/*
 * Wrapper function to get the mdi framework component
 */
int		mdi_get_component_type(dev_info_t *);

/*
 * vHCI driver instance registration/unregistration
 *
 * mdi_vhci_register() is called by vHCI drivers to register the 'class_driver'
 * vHCI driver and its callback entrypoints with the mpxio framework.
 * vHCI driver must call this interface as part of its attach(9e) handler.
 *
 * mdi_vhci_unregister() is called from their detach(9E) handler to unregister
 * their instances from the framework.
 */
int		mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int);
int		mdi_vhci_unregister(dev_info_t *, int);

/*
 * pHCI driver instance registration/unregistration
 *
 * mdi_phci_register() is called by pHCI drivers to register with the mpxio
 * framework and a specific 'class_driver' vHCI.  The pHCI driver must call
 * this interface as part of its attach(9e) handler.
 *
 * mdi_phci_unregister() is called by the pHCI drivers from their
 * detach(9E) handler to unregister their instances from the framework.
 */
int		mdi_phci_register(char *, dev_info_t *, int);
int		mdi_phci_unregister(dev_info_t *, int);

/*
 * Utility functions
 */
int		mdi_phci_get_path_count(dev_info_t *);
dev_info_t	*mdi_phci_path2devinfo(dev_info_t *, caddr_t);


/*
 * path selection functions
 * mdi_select_path() function is called by the vHCI drivers to select a path
 * to route the I/O request to.  The caller passes the block I/O data transfer
 * structure ("buf") as one of the parameters.  The mpxio framework uses the buf
 * structure contents to maintain per path statistics (total I/O size / count
 * pending).  If more than one online paths are available to select, the
 * framework automatically selects a suitable path for routing I/O request.
 * If a failover operation is active for this client device the call shall be
 * failed with MDI_BUSY error code.
 *
 * By default this function returns a suitable path in online state based on
 * the current load balancing policy.  Currently we support LOAD_BALANCE_NONE
 * (Previously selected online path will continue to be used till the path
 * is usable) and LOAD_BALANCE_RR (Online paths will be selected in a round
 * robin fashion).  The load balancing scheme can be selected through vHCI
 * drivers configuration file (driver.conf).
 *
 * vHCI drivers may override this default behaviour by specifying appropriate
 * flags.  If start_pip is specified (non NULL) is used as start point to walk
 * and find the next appropriate path.  The following values are currently
 * defined:  MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or
 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path).
 *
 * The selected paths are returned in a held state (ref_cnt) and caller should
 * release the hold by calling mdi_rele_path() at the end of operation.
 */

int		mdi_select_path(dev_info_t *, struct buf *, int,
		    mdi_pathinfo_t *, mdi_pathinfo_t **);
void		mdi_hold_path(mdi_pathinfo_t *);
void		mdi_rele_path(mdi_pathinfo_t *);
int		mdi_set_lb_policy(dev_info_t *, client_lb_t);
client_lb_t	mdi_get_lb_policy(dev_info_t *);

/*
 * flags for mdi_select_path() routine
 */
#define	MDI_SELECT_ONLINE_PATH		0x0001
#define	MDI_SELECT_STANDBY_PATH		0x0002

/*
 * MDI client device utility functions
 */
int		mdi_client_get_path_count(dev_info_t *);
dev_info_t	*mdi_client_path2devinfo(dev_info_t *, caddr_t);

/*
 * failover
 *
 * The vHCI driver calls mdi_failover() to initiate a failover operation.
 * mdi_failover() calls back into the vHCI driver's vo_failover()
 * entry point to perform the actual failover operation.  The reason
 * for requiring the vHCI driver to initiate failover by calling
 * mdi_failover(), instead of directly executing vo_failover() itself,
 * is to ensure that the mdi framework can keep track of the client
 * state properly.  Additionally, mdi_failover() provides as a
 * convenience the option of performing the failover operation
 * synchronously or asynchronously
 *
 * Upon successful completion of the failover operation, the paths that were
 * previously ONLINE will be in the STANDBY state, and the newly activated
 * paths will be in the ONLINE state.
 *
 * The flags modifier determines whether the activation is done synchronously
 */
int mdi_failover(dev_info_t *, dev_info_t *, int);

/*
 * Client device failover mode of operation
 */
#define	MDI_FAILOVER_SYNC	1	/* Syncronous Failover		*/
#define	MDI_FAILOVER_ASYNC	2	/* Asyncronous Failover		*/

/*
 * mdi_pathinfo management functions.
 *
 * Find, allocate and Free functions.
 */
mdi_pathinfo_t *mdi_pi_find(dev_info_t *, char *, char *);
int mdi_pi_alloc(dev_info_t *, char *, char *, char *, int, mdi_pathinfo_t **);
int mdi_pi_free(mdi_pathinfo_t *, int);

/*
 * mdi_pathinfo node state change functions.
 *
 */
int mdi_pi_online(mdi_pathinfo_t *, int);
int mdi_pi_standby(mdi_pathinfo_t *, int);
int mdi_pi_fault(mdi_pathinfo_t *, int);
int mdi_pi_offline(mdi_pathinfo_t *, int);
void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *);

/*
 * mdi_pathinfo node member functions
 */
caddr_t mdi_pi_get_phci_private(mdi_pathinfo_t *);
caddr_t mdi_pi_get_client_private(mdi_pathinfo_t *);
void mdi_pi_set_phci_private(mdi_pathinfo_t *, caddr_t);
void mdi_pi_set_client_private(mdi_pathinfo_t *, caddr_t);
void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t);

/*
 * mdi_pathinfo Property handling functions
 */
int mdi_prop_remove(mdi_pathinfo_t *, char *);
int mdi_prop_size(mdi_pathinfo_t *, size_t *);
int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t);
int mdi_prop_update_byte_array(mdi_pathinfo_t *, char *, uchar_t *, uint_t);
int mdi_prop_update_int(mdi_pathinfo_t *, char *, int);
int mdi_prop_update_int64(mdi_pathinfo_t *, char *, int64_t);
int mdi_prop_update_int_array(mdi_pathinfo_t *, char *, int *, uint_t);
int mdi_prop_update_string(mdi_pathinfo_t *, char *, char *);
int mdi_prop_update_string_array(mdi_pathinfo_t *, char *, char **, uint_t);
int mdi_prop_lookup_byte_array(mdi_pathinfo_t *, char *, uchar_t **, uint_t *);
int mdi_prop_lookup_int(mdi_pathinfo_t *, char *, int *);
int mdi_prop_lookup_int64(mdi_pathinfo_t *, char *, int64_t *);
int mdi_prop_lookup_int_array(mdi_pathinfo_t *, char *, int **, uint_t *);
int mdi_prop_lookup_string(mdi_pathinfo_t *, char *, char **);
int mdi_prop_lookup_string_array(mdi_pathinfo_t *, char *, char ***, uint_t *);
int mdi_prop_free(void *);

#endif	/* _KERNEL */

#ifdef	__cplusplus
}
#endif

#endif	/* _SYS_MDI_IMPLDEFS_H */
