--- zzzz-none-000/linux-2.6.19.2/drivers/scsi/scsi_transport_iscsi.c 2007-01-10 19:10:37.000000000 +0000 +++ davinci-8020-5505/linux-2.6.19.2/drivers/scsi/scsi_transport_iscsi.c 2007-01-11 07:38:19.000000000 +0000 @@ -21,6 +21,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include +#include #include #include #include @@ -33,7 +34,7 @@ #define ISCSI_SESSION_ATTRS 11 #define ISCSI_CONN_ATTRS 11 #define ISCSI_HOST_ATTRS 0 -#define ISCSI_TRANSPORT_VERSION "2.0-724" +#define ISCSI_TRANSPORT_VERSION "2.0-685" struct iscsi_internal { int daemon_pid; @@ -148,6 +149,30 @@ static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex); +struct mempool_zone { + mempool_t *pool; + atomic_t allocated; + int size; + int hiwat; + struct list_head freequeue; + spinlock_t freelock; +}; + +static struct mempool_zone *z_reply; + +/* + * Z_MAX_* - actual mempool size allocated at the mempool_zone_init() time + * Z_HIWAT_* - zone's high watermark when if_error bit will be set to -ENOMEM + * so daemon will notice OOM on NETLINK tranposrt level and will + * be able to predict or change operational behavior + */ +#define Z_MAX_REPLY 8 +#define Z_HIWAT_REPLY 6 +#define Z_MAX_PDU 8 +#define Z_HIWAT_PDU 6 +#define Z_MAX_ERROR 16 +#define Z_HIWAT_ERROR 12 + static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); @@ -389,11 +414,59 @@ } EXPORT_SYMBOL_GPL(iscsi_destroy_session); +static void mempool_zone_destroy(struct mempool_zone *zp) +{ + mempool_destroy(zp->pool); + kfree(zp); +} + +static void* +mempool_zone_alloc_skb(gfp_t gfp_mask, void *pool_data) +{ + struct mempool_zone *zone = pool_data; + + return alloc_skb(zone->size, gfp_mask); +} + +static void +mempool_zone_free_skb(void *element, void *pool_data) +{ + kfree_skb(element); +} + +static struct mempool_zone * +mempool_zone_init(unsigned max, unsigned size, unsigned hiwat) +{ + struct mempool_zone *zp; + + zp = kzalloc(sizeof(*zp), GFP_KERNEL); + if (!zp) + return NULL; + + zp->size = size; + zp->hiwat = hiwat; + INIT_LIST_HEAD(&zp->freequeue); + spin_lock_init(&zp->freelock); + atomic_set(&zp->allocated, 0); + + zp->pool = mempool_create(max, mempool_zone_alloc_skb, + mempool_zone_free_skb, zp); + if (!zp->pool) { + kfree(zp); + return NULL; + } + + return zp; +} + static void iscsi_conn_release(struct device *dev) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev); struct device *parent = conn->dev.parent; + mempool_zone_destroy(conn->z_pdu); + mempool_zone_destroy(conn->z_error); + kfree(conn); put_device(parent); } @@ -403,6 +476,31 @@ return dev->release == iscsi_conn_release; } +static int iscsi_create_event_pools(struct iscsi_cls_conn *conn) +{ + conn->z_pdu = mempool_zone_init(Z_MAX_PDU, + NLMSG_SPACE(sizeof(struct iscsi_uevent) + + sizeof(struct iscsi_hdr) + + DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH), + Z_HIWAT_PDU); + if (!conn->z_pdu) { + dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate " + "pdu zone for new conn\n"); + return -ENOMEM; + } + + conn->z_error = mempool_zone_init(Z_MAX_ERROR, + NLMSG_SPACE(sizeof(struct iscsi_uevent)), + Z_HIWAT_ERROR); + if (!conn->z_error) { + dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate " + "error zone for new conn\n"); + mempool_zone_destroy(conn->z_pdu); + return -ENOMEM; + } + return 0; +} + /** * iscsi_create_conn - create iscsi class connection * @session: iscsi cls session @@ -435,9 +533,12 @@ conn->transport = transport; conn->cid = cid; + if (iscsi_create_event_pools(conn)) + goto free_conn; + /* this is released in the dev's release function */ if (!get_device(&session->dev)) - goto free_conn; + goto free_conn_pools; snprintf(conn->dev.bus_id, BUS_ID_SIZE, "connection%d:%u", session->sid, cid); @@ -454,6 +555,8 @@ release_parent_ref: put_device(&session->dev); +free_conn_pools: + free_conn: kfree(conn); return NULL; @@ -496,31 +599,81 @@ return NULL; } +static inline struct list_head *skb_to_lh(struct sk_buff *skb) +{ + return (struct list_head *)&skb->cb; +} + +static void +mempool_zone_complete(struct mempool_zone *zone) +{ + unsigned long flags; + struct list_head *lh, *n; + + spin_lock_irqsave(&zone->freelock, flags); + list_for_each_safe(lh, n, &zone->freequeue) { + struct sk_buff *skb = (struct sk_buff *)((char *)lh - + offsetof(struct sk_buff, cb)); + if (!skb_shared(skb)) { + list_del(skb_to_lh(skb)); + mempool_free(skb, zone->pool); + atomic_dec(&zone->allocated); + } + } + spin_unlock_irqrestore(&zone->freelock, flags); +} + +static struct sk_buff* +mempool_zone_get_skb(struct mempool_zone *zone) +{ + struct sk_buff *skb; + + skb = mempool_alloc(zone->pool, GFP_ATOMIC); + if (skb) + atomic_inc(&zone->allocated); + return skb; +} + static int -iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp) +iscsi_broadcast_skb(struct mempool_zone *zone, struct sk_buff *skb, gfp_t gfp) { + unsigned long flags; int rc; + skb_get(skb); rc = netlink_broadcast(nls, skb, 0, 1, gfp); if (rc < 0) { + mempool_free(skb, zone->pool); printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc); return rc; } + spin_lock_irqsave(&zone->freelock, flags); + INIT_LIST_HEAD(skb_to_lh(skb)); + list_add(skb_to_lh(skb), &zone->freequeue); + spin_unlock_irqrestore(&zone->freelock, flags); return 0; } static int -iscsi_unicast_skb(struct sk_buff *skb, int pid) +iscsi_unicast_skb(struct mempool_zone *zone, struct sk_buff *skb, int pid) { + unsigned long flags; int rc; + skb_get(skb); rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); if (rc < 0) { + mempool_free(skb, zone->pool); printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc); return rc; } + spin_lock_irqsave(&zone->freelock, flags); + INIT_LIST_HEAD(skb_to_lh(skb)); + list_add(skb_to_lh(skb), &zone->freequeue); + spin_unlock_irqrestore(&zone->freelock, flags); + return 0; } @@ -539,7 +692,9 @@ if (!priv) return -EINVAL; - skb = alloc_skb(len, GFP_ATOMIC); + mempool_zone_complete(conn->z_pdu); + + skb = mempool_zone_get_skb(conn->z_pdu); if (!skb) { iscsi_conn_error(conn, ISCSI_ERR_CONN_FAILED); dev_printk(KERN_ERR, &conn->dev, "iscsi: can not deliver " @@ -552,13 +707,15 @@ memset(ev, 0, sizeof(*ev)); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_RECV_PDU; + if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat) + ev->iferror = -ENOMEM; ev->r.recv_req.cid = conn->cid; ev->r.recv_req.sid = iscsi_conn_get_sid(conn); pdu = (char*)ev + sizeof(*ev); memcpy(pdu, hdr, sizeof(struct iscsi_hdr)); memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size); - return iscsi_unicast_skb(skb, priv->daemon_pid); + return iscsi_unicast_skb(conn->z_pdu, skb, priv->daemon_pid); } EXPORT_SYMBOL_GPL(iscsi_recv_pdu); @@ -574,7 +731,9 @@ if (!priv) return; - skb = alloc_skb(len, GFP_ATOMIC); + mempool_zone_complete(conn->z_error); + + skb = mempool_zone_get_skb(conn->z_error); if (!skb) { dev_printk(KERN_ERR, &conn->dev, "iscsi: gracefully ignored " "conn error (%d)\n", error); @@ -585,11 +744,13 @@ ev = NLMSG_DATA(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_ERROR; + if (atomic_read(&conn->z_error->allocated) >= conn->z_error->hiwat) + ev->iferror = -ENOMEM; ev->r.connerror.error = error; ev->r.connerror.cid = conn->cid; ev->r.connerror.sid = iscsi_conn_get_sid(conn); - iscsi_broadcast_skb(skb, GFP_ATOMIC); + iscsi_broadcast_skb(conn->z_error, skb, GFP_ATOMIC); dev_printk(KERN_INFO, &conn->dev, "iscsi: detected conn error (%d)\n", error); @@ -606,7 +767,9 @@ int flags = multi ? NLM_F_MULTI : 0; int t = done ? NLMSG_DONE : type; - skb = alloc_skb(len, GFP_ATOMIC); + mempool_zone_complete(z_reply); + + skb = mempool_zone_get_skb(z_reply); /* * FIXME: * user is supposed to react on iferror == -ENOMEM; @@ -617,7 +780,7 @@ nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0); nlh->nlmsg_flags = flags; memcpy(NLMSG_DATA(nlh), payload, size); - return iscsi_unicast_skb(skb, pid); + return iscsi_unicast_skb(z_reply, skb, pid); } static int @@ -647,7 +810,9 @@ do { int actual_size; - skbstat = alloc_skb(len, GFP_ATOMIC); + mempool_zone_complete(conn->z_pdu); + + skbstat = mempool_zone_get_skb(conn->z_pdu); if (!skbstat) { dev_printk(KERN_ERR, &conn->dev, "iscsi: can not " "deliver stats: OOM\n"); @@ -660,6 +825,8 @@ memset(evstat, 0, sizeof(*evstat)); evstat->transport_handle = iscsi_handle(conn->transport); evstat->type = nlh->nlmsg_type; + if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat) + evstat->iferror = -ENOMEM; evstat->u.get_stats.cid = ev->u.get_stats.cid; evstat->u.get_stats.sid = @@ -678,7 +845,7 @@ skb_trim(skbstat, NLMSG_ALIGN(actual_size)); nlhstat->nlmsg_len = actual_size; - err = iscsi_unicast_skb(skbstat, priv->daemon_pid); + err = iscsi_unicast_skb(conn->z_pdu, skbstat, priv->daemon_pid); } while (err < 0 && err != -ECONNREFUSED); return err; @@ -709,7 +876,9 @@ session = iscsi_dev_to_session(conn->dev.parent); shost = iscsi_session_to_shost(session); - skb = alloc_skb(len, GFP_KERNEL); + mempool_zone_complete(conn->z_pdu); + + skb = mempool_zone_get_skb(conn->z_pdu); if (!skb) { dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of " "session creation event\n"); @@ -727,7 +896,7 @@ * this will occur if the daemon is not up, so we just warn * the user and when the daemon is restarted it will handle it */ - rc = iscsi_broadcast_skb(skb, GFP_KERNEL); + rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL); if (rc < 0) dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of " "session destruction event. Check iscsi daemon\n"); @@ -770,7 +939,9 @@ session = iscsi_dev_to_session(conn->dev.parent); shost = iscsi_session_to_shost(session); - skb = alloc_skb(len, GFP_KERNEL); + mempool_zone_complete(conn->z_pdu); + + skb = mempool_zone_get_skb(conn->z_pdu); if (!skb) { dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of " "session creation event\n"); @@ -788,7 +959,7 @@ * this will occur if the daemon is not up, so we just warn * the user and when the daemon is restarted it will handle it */ - rc = iscsi_broadcast_skb(skb, GFP_KERNEL); + rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL); if (rc < 0) dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of " "session creation event. Check iscsi daemon\n"); @@ -1107,6 +1278,9 @@ err = iscsi_if_send_reply( NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq, nlh->nlmsg_type, 0, 0, ev, sizeof(*ev)); + if (atomic_read(&z_reply->allocated) >= + z_reply->hiwat) + ev->iferror = -ENOMEM; } while (err < 0 && err != -ECONNREFUSED); skb_pull(skb, rlen); } @@ -1410,6 +1584,32 @@ } EXPORT_SYMBOL_GPL(iscsi_unregister_transport); +static int +iscsi_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_ISCSI && n->pid) { + struct iscsi_cls_conn *conn; + unsigned long flags; + + mempool_zone_complete(z_reply); + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + mempool_zone_complete(conn->z_error); + mempool_zone_complete(conn->z_pdu); + } + spin_unlock_irqrestore(&connlock, flags); + } + + return NOTIFY_DONE; +} + +static struct notifier_block iscsi_nl_notifier = { + .notifier_call = iscsi_rcv_nl_event, +}; + static __init int iscsi_transport_init(void) { int err; @@ -1433,15 +1633,25 @@ if (err) goto unregister_conn_class; + err = netlink_register_notifier(&iscsi_nl_notifier); + if (err) + goto unregister_session_class; + nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, THIS_MODULE); if (!nls) { err = -ENOBUFS; - goto unregister_session_class; + goto unregister_notifier; } - return 0; + z_reply = mempool_zone_init(Z_MAX_REPLY, + NLMSG_SPACE(sizeof(struct iscsi_uevent)), Z_HIWAT_REPLY); + if (z_reply) + return 0; + sock_release(nls->sk_socket); +unregister_notifier: + netlink_unregister_notifier(&iscsi_nl_notifier); unregister_session_class: transport_class_unregister(&iscsi_session_class); unregister_conn_class: @@ -1455,7 +1665,9 @@ static void __exit iscsi_transport_exit(void) { + mempool_zone_destroy(z_reply); sock_release(nls->sk_socket); + netlink_unregister_notifier(&iscsi_nl_notifier); transport_class_unregister(&iscsi_connection_class); transport_class_unregister(&iscsi_session_class); transport_class_unregister(&iscsi_host_class);