--- zzzz-none-000/linux-3.10.107/drivers/block/mtip32xx/mtip32xx.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/block/mtip32xx/mtip32xx.c 2021-02-04 17:41:59.000000000 +0000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -38,13 +39,35 @@ #include <../drivers/ata/ahci.h> #include #include +#include #include "mtip32xx.h" #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) -#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16)) -#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS) -#define HW_PORT_PRIV_DMA_SZ \ - (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) + +/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */ +#define AHCI_RX_FIS_SZ 0x100 +#define AHCI_RX_FIS_OFFSET 0x0 +#define AHCI_IDFY_SZ ATA_SECT_SIZE +#define AHCI_IDFY_OFFSET 0x400 +#define AHCI_SECTBUF_SZ ATA_SECT_SIZE +#define AHCI_SECTBUF_OFFSET 0x800 +#define AHCI_SMARTBUF_SZ ATA_SECT_SIZE +#define AHCI_SMARTBUF_OFFSET 0xC00 +/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */ +#define BLOCK_DMA_ALLOC_SZ 4096 + +/* DMA region containing command table (should be 8192 bytes) */ +#define AHCI_CMD_SLOT_SZ sizeof(struct mtip_cmd_hdr) +#define AHCI_CMD_TBL_SZ (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ) +#define AHCI_CMD_TBL_OFFSET 0x0 + +/* DMA region per command (contains header and SGL) */ +#define AHCI_CMD_TBL_HDR_SZ 0x80 +#define AHCI_CMD_TBL_HDR_OFFSET 0x0 +#define AHCI_CMD_TBL_SGL_SZ (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg)) +#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ +#define CMD_DMA_ALLOC_SZ (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ) + #define HOST_CAP_NZDMA (1 << 19) #define HOST_HSORG 0xFC @@ -126,123 +149,99 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) { u16 vendor_id = 0; + struct driver_data *dd = pci_get_drvdata(pdev); + + if (dd->sr) + return true; /* Read the vendorID from the configuration space */ pci_read_config_word(pdev, 0x00, &vendor_id); - if (vendor_id == 0xFFFF) + if (vendor_id == 0xFFFF) { + dd->sr = true; + if (dd->queue) + set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags); + else + dev_warn(&dd->pdev->dev, + "%s: dd->queue is NULL\n", __func__); return true; /* device removed */ + } return false; /* device present */ } -/* - * This function is called for clean the pending command in the - * command slot during the surprise removal of device and return - * error to the upper layer. - * - * @dd Pointer to the DRIVER_DATA structure. - * - * return value - * None - */ -static void mtip_command_cleanup(struct driver_data *dd) +static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { - int group = 0, commandslot = 0, commandindex = 0; - struct mtip_cmd *command; - struct mtip_port *port = dd->port; - static int in_progress; + struct request *rq; - if (in_progress) - return; + if (mtip_check_surprise_removal(dd->pdev)) + return NULL; - in_progress = 1; + rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); + if (IS_ERR(rq)) + return NULL; - for (group = 0; group < 4; group++) { - for (commandslot = 0; commandslot < 32; commandslot++) { - if (!(port->allocated[group] & (1 << commandslot))) - continue; + return blk_mq_rq_to_pdu(rq); +} - commandindex = group << 5 | commandslot; - command = &port->commands[commandindex]; +static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd) +{ + blk_put_request(blk_mq_rq_from_pdu(cmd)); +} - if (atomic_read(&command->active) - && (command->async_callback)) { - command->async_callback(command->async_data, - -ENODEV); - command->async_callback = NULL; - command->async_data = NULL; - } +/* + * Once we add support for one hctx per mtip group, this will change a bit + */ +static struct request *mtip_rq_from_tag(struct driver_data *dd, + unsigned int tag) +{ + struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - dma_unmap_sg(&port->dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); - } - } + return blk_mq_tag_to_rq(hctx->tags, tag); +} - up(&port->cmd_slot); +static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, + unsigned int tag) +{ + struct request *rq = mtip_rq_from_tag(dd, tag); - set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag); - in_progress = 0; + return blk_mq_rq_to_pdu(rq); } /* - * Obtain an empty command slot. + * IO completion function. * - * This function needs to be reentrant since it could be called - * at the same time on multiple CPUs. The allocation of the - * command slot must be atomic. + * This completion function is called by the driver ISR when a + * command that was issued by the kernel completes. It first calls the + * asynchronous completion function which normally calls back into the block + * layer passing the asynchronous callback data, then unmaps the + * scatter list associated with the completed command, and finally + * clears the allocated bit associated with the completed command. * - * @port Pointer to the port data structure. + * @port Pointer to the port data structure. + * @tag Tag of the command. + * @data Pointer to driver_data. + * @status Completion status. * * return value - * >= 0 Index of command slot obtained. - * -1 No command slots available. + * None */ -static int get_slot(struct mtip_port *port) +static void mtip_async_complete(struct mtip_port *port, + int tag, struct mtip_cmd *cmd, int status) { - int slot, i; - unsigned int num_command_slots = port->dd->slot_groups * 32; + struct driver_data *dd = port->dd; + struct request *rq; - /* - * Try 10 times, because there is a small race here. - * that's ok, because it's still cheaper than a lock. - * - * Race: Since this section is not protected by lock, same bit - * could be chosen by different process contexts running in - * different processor. So instead of costly lock, we are going - * with loop. - */ - for (i = 0; i < 10; i++) { - slot = find_next_zero_bit(port->allocated, - num_command_slots, 1); - if ((slot < num_command_slots) && - (!test_and_set_bit(slot, port->allocated))) - return slot; - } - dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n"); + if (unlikely(!dd) || unlikely(!port)) + return; - if (mtip_check_surprise_removal(port->dd->pdev)) { - /* Device not present, clean outstanding commands */ - mtip_command_cleanup(port->dd); + if (unlikely(status == PORT_IRQ_TF_ERR)) { + dev_warn(&port->dd->pdev->dev, + "Command tag %d failed due to TFE\n", tag); } - return -1; -} -/* - * Release a command slot. - * - * @port Pointer to the port data structure. - * @tag Tag of command to release - * - * return value - * None - */ -static inline void release_slot(struct mtip_port *port, int tag) -{ - smp_mb__before_clear_bit(); - clear_bit(tag, port->allocated); - smp_mb__after_clear_bit(); + rq = mtip_rq_from_tag(dd, tag); + + blk_mq_complete_request(rq, status); } /* @@ -264,8 +263,11 @@ /* Flush */ readl(dd->mmio + HOST_CTL); - /* Spin for up to 2 seconds, waiting for reset acknowledgement */ - timeout = jiffies + msecs_to_jiffies(2000); + /* + * Spin for up to 10 seconds waiting for reset acknowledgement. Spec + * is 1 sec but in LUN failure conditions, up to 10 secs are required + */ + timeout = jiffies + msecs_to_jiffies(10000); do { mdelay(10); if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) @@ -296,8 +298,6 @@ { int group = tag >> 5; - atomic_set(&port->commands[tag].active, 1); - /* guard SACT and CI registers */ spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), @@ -305,10 +305,6 @@ writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); spin_unlock(&port->cmd_issue_lock[group]); - - /* Set the command's timeout value.*/ - port->commands[tag].comp_time = jiffies + msecs_to_jiffies( - MTIP_NCQ_COMMAND_TIMEOUT_MS); } /* @@ -556,185 +552,13 @@ memset(tagmap, 0, sizeof(tagmap)); for (group = SLOTBITS_IN_LONGS; group > 0; group--) - tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ", + tagmap_len += sprintf(tagmap + tagmap_len, "%016lX ", tagbits[group-1]); dev_warn(&dd->pdev->dev, "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); } /* - * Called periodically to see if any read/write commands are - * taking too long to complete. - * - * @data Pointer to the PORT data structure. - * - * return value - * None - */ -static void mtip_timeout_function(unsigned long int data) -{ - struct mtip_port *port = (struct mtip_port *) data; - struct host_to_dev_fis *fis; - struct mtip_cmd *command; - int tag, cmdto_cnt = 0; - unsigned int bit, group; - unsigned int num_command_slots; - unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; - - if (unlikely(!port)) - return; - - if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) { - mod_timer(&port->cmd_timer, - jiffies + msecs_to_jiffies(30000)); - return; - } - /* clear the tag accumulator */ - memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); - num_command_slots = port->dd->slot_groups * 32; - - for (tag = 0; tag < num_command_slots; tag++) { - /* - * Skip internal command slot as it has - * its own timeout mechanism - */ - if (tag == MTIP_TAG_INTERNAL) - continue; - - if (atomic_read(&port->commands[tag].active) && - (time_after(jiffies, port->commands[tag].comp_time))) { - group = tag >> 5; - bit = tag & 0x1F; - - command = &port->commands[tag]; - fis = (struct host_to_dev_fis *) command->command; - - set_bit(tag, tagaccum); - cmdto_cnt++; - if (cmdto_cnt == 1) - set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - - /* - * Clear the completed bit. This should prevent - * any interrupt handlers from trying to retire - * the command. - */ - writel(1 << bit, port->completed[group]); - - /* Call the async completion callback. */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, - -EIO); - command->async_callback = NULL; - command->comp_func = NULL; - - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&port->dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); - - /* - * Clear the allocated bit and active tag for the - * command. - */ - atomic_set(&port->commands[tag].active, 0); - release_slot(port, tag); - - up(&port->cmd_slot); - } - } - - if (cmdto_cnt) { - print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); - if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { - mtip_device_reset(port->dd); - wake_up_interruptible(&port->svc_wait); - } - clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - } - - if (port->ic_pause_timer) { - to = port->ic_pause_timer + msecs_to_jiffies(1000); - if (time_after(jiffies, to)) { - if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { - port->ic_pause_timer = 0; - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - } - - - } - } - - /* Restart the timer */ - mod_timer(&port->cmd_timer, - jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); -} - -/* - * IO completion function. - * - * This completion function is called by the driver ISR when a - * command that was issued by the kernel completes. It first calls the - * asynchronous completion function which normally calls back into the block - * layer passing the asynchronous callback data, then unmaps the - * scatter list associated with the completed command, and finally - * clears the allocated bit associated with the completed command. - * - * @port Pointer to the port data structure. - * @tag Tag of the command. - * @data Pointer to driver_data. - * @status Completion status. - * - * return value - * None - */ -static void mtip_async_complete(struct mtip_port *port, - int tag, - void *data, - int status) -{ - struct mtip_cmd *command; - struct driver_data *dd = data; - int cb_status = status ? -EIO : 0; - - if (unlikely(!dd) || unlikely(!port)) - return; - - command = &port->commands[tag]; - - if (unlikely(status == PORT_IRQ_TF_ERR)) { - dev_warn(&port->dd->pdev->dev, - "Command tag %d failed due to TFE\n", tag); - } - - /* Upper layer callback */ - if (likely(command->async_callback)) - command->async_callback(command->async_data, cb_status); - - command->async_callback = NULL; - command->comp_func = NULL; - - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); - - /* Clear the allocated and active bits for the command */ - atomic_set(&port->commands[tag].active, 0); - release_slot(port, tag); - - if (unlikely(command->unaligned)) - up(&port->cmd_slot_unal); - else - up(&port->cmd_slot); -} - -/* * Internal command completion callback function. * * This function is normally called by the driver ISR when an internal @@ -750,28 +574,21 @@ * None */ static void mtip_completion(struct mtip_port *port, - int tag, - void *data, - int status) + int tag, struct mtip_cmd *command, int status) { - struct mtip_cmd *command = &port->commands[tag]; - struct completion *waiting = data; + struct completion *waiting = command->comp_data; if (unlikely(status == PORT_IRQ_TF_ERR)) dev_warn(&port->dd->pdev->dev, "Internal command %d completed with TFE\n", tag); - command->async_callback = NULL; command->comp_func = NULL; - + command->comp_data = NULL; complete(waiting); } static void mtip_null_completion(struct mtip_port *port, - int tag, - void *data, - int status) + int tag, struct mtip_cmd *command, int status) { - return; } static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, @@ -803,21 +620,15 @@ port = dd->port; - /* Stop the timer to prevent command timeouts. */ - del_timer(&port->cmd_timer); - set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_TAG_INTERNAL, port->allocated)) { - cmd = &port->commands[MTIP_TAG_INTERNAL]; + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { + cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); - atomic_inc(&cmd->active); /* active > 1 indicates error */ if (cmd->comp_data && cmd->comp_func) { cmd->comp_func(port, MTIP_TAG_INTERNAL, - cmd->comp_data, PORT_IRQ_TF_ERR); + cmd, PORT_IRQ_TF_ERR); } - goto handle_tfe_exit; + return; } /* clear the tag accumulator */ @@ -827,6 +638,8 @@ for (group = 0; group < dd->slot_groups; group++) { completed = readl(port->completed[group]); + dev_warn(&dd->pdev->dev, "g=%u, comp=%x\n", group, completed); + /* clear completed status register in the hardware.*/ writel(completed, port->completed[group]); @@ -840,21 +653,16 @@ if (tag == MTIP_TAG_INTERNAL) continue; - cmd = &port->commands[tag]; + cmd = mtip_cmd_from_tag(dd, tag); if (likely(cmd->comp_func)) { set_bit(tag, tagaccum); cmd_cnt++; - atomic_set(&cmd->active, 0); - cmd->comp_func(port, - tag, - cmd->comp_data, - 0); + cmd->comp_func(port, tag, cmd, 0); } else { dev_err(&port->dd->pdev->dev, "Missing completion func for tag %d", tag); if (mtip_check_surprise_removal(dd->pdev)) { - mtip_command_cleanup(dd); /* don't proceed further */ return; } @@ -893,8 +701,9 @@ fail_reason = "thermal shutdown"; } if (buf[288] == 0xBF) { + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); dev_info(&dd->pdev->dev, - "Drive indicates rebuild has failed.\n"); + "Drive indicates rebuild has failed. Secure erase required.\n"); fail_all_ncq_cmds = 1; fail_reason = "rebuild failed"; } @@ -908,11 +717,7 @@ for (bit = 0; bit < 32; bit++) { reissue = 1; tag = (group << 5) + bit; - cmd = &port->commands[tag]; - - /* If the active bit is set re-issue the command */ - if (atomic_read(&cmd->active) == 0) - continue; + cmd = mtip_cmd_from_tag(dd, tag); fis = (struct host_to_dev_fis *)cmd->command; @@ -931,11 +736,9 @@ tag, fail_reason != NULL ? fail_reason : "unknown"); - atomic_set(&cmd->active, 0); if (cmd->comp_func) { cmd->comp_func(port, tag, - cmd->comp_data, - -ENODATA); + cmd, -ENODATA); } continue; } @@ -958,14 +761,9 @@ /* Retire a command that will not be reissued */ dev_warn(&port->dd->pdev->dev, "retiring tag %d\n", tag); - atomic_set(&cmd->active, 0); if (cmd->comp_func) - cmd->comp_func( - port, - tag, - cmd->comp_data, - PORT_IRQ_TF_ERR); + cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR); else dev_warn(&port->dd->pdev->dev, "Bad completion for tag %d\n", @@ -973,14 +771,6 @@ } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); - -handle_tfe_exit: - /* clear eh_active */ - clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - - mod_timer(&port->cmd_timer, - jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); } /* @@ -1009,23 +799,16 @@ if (unlikely(tag == MTIP_TAG_INTERNAL)) continue; - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", + command = mtip_cmd_from_tag(dd, tag); + if (likely(command->comp_func)) + command->comp_func(port, tag, command, 0); + else { + dev_dbg(&dd->pdev->dev, + "Null completion for tag %d", tag); if (mtip_check_surprise_removal( dd->pdev)) { - mtip_command_cleanup(dd); return; } } @@ -1044,16 +827,13 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) { struct mtip_port *port = dd->port; - struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; + struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL))) { if (cmd->comp_func) { - cmd->comp_func(port, - MTIP_TAG_INTERNAL, - cmd->comp_data, - 0); + cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); return; } } @@ -1066,8 +846,6 @@ */ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) { - if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) - mtip_handle_tfe(dd); if (unlikely(port_stat & PORT_IRQ_CONNECT)) { dev_warn(&dd->pdev->dev, @@ -1085,6 +863,12 @@ dev_warn(&dd->pdev->dev, "Port stat errors %x unhandled\n", (port_stat & ~PORT_IRQ_HANDLED)); + if (mtip_check_surprise_removal(dd->pdev)) + return; + } + if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) { + set_bit(MTIP_PF_EH_ACTIVE_BIT, &dd->port->flags); + wake_up_interruptible(&dd->port->svc_wait); } } @@ -1103,6 +887,10 @@ /* Acknowledge the interrupt status on the port.*/ port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (unlikely(port_stat == 0xFFFFFFFF)) { + mtip_check_surprise_removal(dd->pdev); + return IRQ_HANDLED; + } writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ @@ -1145,7 +933,6 @@ if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { - mtip_command_cleanup(dd); /* don't proceed further */ return IRQ_HANDLED; } @@ -1186,7 +973,6 @@ static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); writel(1 << MTIP_TAG_BIT(tag), port->cmd_issue[MTIP_TAG_INDEX(tag)]); } @@ -1200,15 +986,10 @@ reply = port->rxfis + RX_FIS_D2H_REG; task_file_data = readl(port->mmio+PORT_TFDATA); - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); - if ((task_file_data & 1)) return false; if (fis->command == ATA_CMD_SEC_ERASE_PREP) { - set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); port->ic_pause_timer = jiffies; return true; } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && @@ -1220,8 +1001,11 @@ ((fis->command == 0xFC) && (fis->features == 0x27 || fis->features == 0x72 || fis->features == 0x62 || fis->features == 0x26))) { + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); return false; } @@ -1233,26 +1017,40 @@ * * @port Pointer to port data structure * @timeout Max duration to wait (ms) + * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, + gfp_t atomic) { unsigned long to; unsigned int n; unsigned int active = 1; + blk_mq_stop_hw_queues(port->dd->queue); + to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && + atomic == GFP_KERNEL) { msleep(20); continue; /* svc thd is actively issuing commands */ } - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) - return -EFAULT; + + if (atomic == GFP_KERNEL) + msleep(100); + else { + cpu_relax(); + udelay(100); + } + + if (mtip_check_surprise_removal(port->dd->pdev)) + goto err_fault; + /* * Ignore s_active bit 0 of array element 0. * This bit will always be set @@ -1263,11 +1061,13 @@ if (!active) break; - - msleep(20); } while (time_before(jiffies, to)); + blk_mq_start_stopped_hw_queues(port->dd->queue, true); return active ? -EBUSY : 0; +err_fault: + blk_mq_start_stopped_hw_queues(port->dd->queue, true); + return -EFAULT; } /* @@ -1299,10 +1099,10 @@ { struct mtip_cmd_sg *command_sg; DECLARE_COMPLETION_ONSTACK(wait); - int rv = 0, ready2go = 1; - struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; - unsigned long to; + struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; + int rv = 0; + unsigned long start; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { @@ -1310,32 +1110,27 @@ return -EFAULT; } - to = jiffies + msecs_to_jiffies(timeout); - do { - ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL, - port->allocated); - if (ready2go) - break; - mdelay(100); - } while (time_before(jiffies, to)); - if (!ready2go) { - dev_warn(&dd->pdev->dev, - "Internal cmd active. new cmd [%02X]\n", fis->command); - return -EBUSY; + int_cmd = mtip_get_int_command(dd); + if (!int_cmd) { + dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); + return -EFAULT; } + set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - port->ic_pause_timer = 0; - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + if (fis->command == ATA_CMD_SEC_ERASE_PREP) + set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, 5000) < 0) { + if (mtip_quiesce_io(port, + MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); - release_slot(port, MTIP_TAG_INTERNAL); + mtip_put_int_command(dd, int_cmd); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return -EBUSY; @@ -1375,18 +1170,22 @@ /* Populate the command header */ int_cmd->command_header->byte_count = 0; + start = jiffies; + /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); if (atomic == GFP_KERNEL) { /* Wait for the command to complete or timeout. */ - if (wait_for_completion_interruptible_timeout( + if ((rv = wait_for_completion_interruptible_timeout( &wait, - msecs_to_jiffies(timeout)) <= 0) { + msecs_to_jiffies(timeout))) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %lu ms\n", - fis->command, timeout); + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); rv = -EINTR; goto exec_ic_exit; } else if (rv == 0) /* timeout */ @@ -1461,13 +1260,12 @@ } exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ - atomic_set(&int_cmd->active, 0); - release_slot(port, MTIP_TAG_INTERNAL); + mtip_put_int_command(dd, int_cmd); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ return rv; } - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return rv; @@ -1519,7 +1317,7 @@ *timeout = 15000; /* 15 seconds */ break; default: - *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; + *timeout = MTIP_IOCTL_CMD_TIMEOUT_MS; break; } } @@ -1571,7 +1369,7 @@ sizeof(u16) * ATA_ID_WORDS, 0, GFP_KERNEL, - MTIP_INTERNAL_COMMAND_TIMEOUT_MS) + MTIP_INT_CMD_TIMEOUT_MS) < 0) { rv = -1; goto out; @@ -1594,6 +1392,12 @@ } #endif + /* Check security locked state */ + if (port->identify[128] & 0x4) + set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + else + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + #ifdef MTIP_TRIM /* Disabling TRIM support temporarily */ /* Demux ID.DRAT & ID.RZAT to determine trim support */ if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) @@ -1697,7 +1501,7 @@ sectors * ATA_SECT_SIZE, 0, GFP_ATOMIC, - MTIP_INTERNAL_COMMAND_TIMEOUT_MS); + MTIP_INT_CMD_TIMEOUT_MS); } /* @@ -1918,6 +1722,10 @@ strlcpy(cbuf, (char *)(port->identify+27), 41); dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf); + dev_info(&port->dd->pdev->dev, "Security: %04x %s\n", + port->identify[128], + port->identify[128] & 0x4 ? "(LOCKED)" : ""); + if (mtip_hw_get_capacity(port->dd, §ors)) dev_info(&port->dd->pdev->dev, "Capacity: %llu sectors (%llu MB)\n", @@ -1986,6 +1794,7 @@ { struct host_to_dev_fis fis; struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); + unsigned int to; /* Build the FIS. */ memset(&fis, 0, sizeof(struct host_to_dev_fis)); @@ -1999,6 +1808,8 @@ fis.cyl_hi = command[5]; fis.device = command[6] & ~0x10; /* Clear the dev bit*/ + mtip_set_timeout(port->dd, &fis, &to, 0); + dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n", __func__, command[0], @@ -2017,7 +1828,7 @@ 0, 0, GFP_KERNEL, - MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) { + to) < 0) { return -1; } @@ -2057,6 +1868,7 @@ u8 *buf = NULL; dma_addr_t dma_addr = 0; int rv = 0, xfer_sz = command[3]; + unsigned int to; if (xfer_sz) { if (!user_buffer) @@ -2088,6 +1900,8 @@ fis.cyl_hi = 0xC2; } + mtip_set_timeout(port->dd, &fis, &to, 0); + if (xfer_sz) reply = (port->rxfis + RX_FIS_PIO_SETUP); else @@ -2110,7 +1924,7 @@ (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), 0, GFP_KERNEL, - MTIP_IOCTL_COMMAND_TIMEOUT_MS) + to) < 0) { rv = -EFAULT; goto exit_drive_command; @@ -2564,22 +2378,23 @@ * return value * None */ -static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, - int nsect, int nents, int tag, void *callback, - void *data, int dir, int unaligned) +static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, + struct mtip_cmd *command, int nents, + struct blk_mq_hw_ctx *hctx) { struct host_to_dev_fis *fis; struct mtip_port *port = dd->port; - struct mtip_cmd *command = &port->commands[tag]; - int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - u64 start = sector; + int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + u64 start = blk_rq_pos(rq); + unsigned int nsect = blk_rq_sectors(rq); /* Map the scatter list for DMA access */ nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); + prefetch(&port->flags); + command->scatter_ents = nents; - command->unaligned = unaligned; /* * The number of retries for this command before it is * reported as a failure to the upper layers. @@ -2590,8 +2405,10 @@ fis = command->command; fis->type = 0x27; fis->opts = 1 << 7; - fis->command = - (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); + if (dma_dir == DMA_FROM_DEVICE) + fis->command = ATA_CMD_FPDMA_READ; + else + fis->command = ATA_CMD_FPDMA_WRITE; fis->lba_low = start & 0xFF; fis->lba_mid = (start >> 8) & 0xFF; fis->lba_hi = (start >> 16) & 0xFF; @@ -2601,14 +2418,14 @@ fis->device = 1 << 6; fis->features = nsect & 0xFF; fis->features_ex = (nsect >> 8) & 0xFF; - fis->sect_count = ((tag << 3) | (tag >> 5)); + fis->sect_count = ((rq->tag << 3) | (rq->tag >> 5)); fis->sect_cnt_ex = 0; fis->control = 0; fis->res2 = 0; fis->res3 = 0; fill_command_sg(dd, command, nents); - if (unaligned) + if (unlikely(command->unaligned)) fis->device |= 1 << 7; /* Populate the command header */ @@ -2626,81 +2443,17 @@ command->direction = dma_dir; /* - * Set the completion function and data for the command passed - * from the upper layer. - */ - command->async_data = data; - command->async_callback = callback; - - /* * To prevent this command from being issued * if an internal command is in progress or error handling is active. */ - if (port->flags & MTIP_PF_PAUSE_IO) { - set_bit(tag, port->cmds_to_issue); + if (unlikely(port->flags & MTIP_PF_PAUSE_IO)) { + set_bit(rq->tag, port->cmds_to_issue); set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); return; } /* Issue the command to the hardware */ - mtip_issue_ncq_command(port, tag); - - return; -} - -/* - * Release a command slot. - * - * @dd Pointer to the driver data structure. - * @tag Slot tag - * - * return value - * None - */ -static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag, - int unaligned) -{ - struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal : - &dd->port->cmd_slot; - release_slot(dd->port, tag); - up(sem); -} - -/* - * Obtain a command slot and return its associated scatter list. - * - * @dd Pointer to the driver data structure. - * @tag Pointer to an int that will receive the allocated command - * slot tag. - * - * return value - * Pointer to the scatter list for the allocated command slot - * or NULL if no command slots are available. - */ -static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, - int *tag, int unaligned) -{ - struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal : - &dd->port->cmd_slot; - - /* - * It is possible that, even with this semaphore, a thread - * may think that no command slots are available. Therefore, we - * need to make an attempt to get_slot(). - */ - down(sem); - *tag = get_slot(dd->port); - - if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { - up(sem); - return NULL; - } - if (unlikely(*tag < 0)) { - up(sem); - return NULL; - } - - return dd->port->commands[*tag].sg; + mtip_issue_ncq_command(port, rq->tag); } /* @@ -2810,34 +2563,51 @@ static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { + struct driver_data *dd = (struct driver_data *)f->private_data; int size = *offset; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; + int rv = 0; if (!len || *offset) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: status buffer\n"); + return -ENOMEM; + } + size += show_device_status(NULL, buf); *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { struct driver_data *dd = (struct driver_data *)f->private_data; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; u32 group_allocated; int size = *offset; - int n; + int n, rv = 0; if (!len || size) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: register buffer\n"); + return -ENOMEM; + } + size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) @@ -2865,18 +2635,6 @@ readl(dd->mmio + HOST_IRQ_STAT)); size += sprintf(&buf[size], "\n"); - size += sprintf(&buf[size], "L/ Allocated : [ 0x"); - - for (n = dd->slot_groups-1; n >= 0; n--) { - if (sizeof(long) > sizeof(u32)) - group_allocated = - dd->port->allocated[n/2] >> (32*(n&1)); - else - group_allocated = dd->port->allocated[n]; - size += sprintf(&buf[size], "%08X ", group_allocated); - } - size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "L/ Commands in Q : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) { @@ -2892,21 +2650,30 @@ *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { struct driver_data *dd = (struct driver_data *)f->private_data; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; int size = *offset; + int rv = 0; if (!len || size) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: flag buffer\n"); + return -ENOMEM; + } + size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", dd->port->flags); size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", @@ -2915,9 +2682,10 @@ *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static const struct file_operations mtip_device_status_fops = { @@ -3010,7 +2778,6 @@ debugfs_remove_recursive(dd->dfs_node); } - /* * Perform any init/resume time hardware setup * @@ -3133,7 +2900,6 @@ mtip_block_initialize(dd); return 0; } - ssleep(10); } while (time_before(jiffies, timeout)); /* Check for timeout */ @@ -3143,6 +2909,42 @@ return -EFAULT; } +static void mtip_softirq_done_fn(struct request *rq) +{ + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct driver_data *dd = rq->q->queuedata; + + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, + cmd->direction); + + if (unlikely(cmd->unaligned)) + up(&dd->port->cmd_slot_unal); + + blk_mq_end_request(rq, rq->errors); +} + +static void mtip_abort_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); + + clear_bit(req->tag, dd->port->cmds_to_issue); + req->errors = -EIO; + mtip_softirq_done_fn(req); +} + +static void mtip_queue_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + set_bit(req->tag, dd->port->cmds_to_issue); + blk_abort_request(req); +} + /* * service thread to issue queued commands * @@ -3155,26 +2957,69 @@ static int mtip_service_thread(void *data) { struct driver_data *dd = (struct driver_data *)data; - unsigned long slot, slot_start, slot_wrap; + unsigned long slot, slot_start, slot_wrap, to; unsigned int num_cmd_slots = dd->slot_groups * 32; struct mtip_port *port = dd->port; while (1) { + if (kthread_should_stop() || + test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) + goto st_out; + clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + /* * the condition is to check neither an internal command is * is in progress nor error handling is active */ wait_event_interruptible(port->svc_wait, (port->flags) && - !(port->flags & MTIP_PF_PAUSE_IO)); + (port->flags & MTIP_PF_SVC_THD_WORK)); - if (kthread_should_stop()) - break; + if (kthread_should_stop() || + test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) + goto st_out; if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) - break; + goto st_out; set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + +restart_eh: + /* Demux bits: start with error handling */ + if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) { + mtip_handle_tfe(dd); + clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); + } + + if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) + goto restart_eh; + + if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) { + to = jiffies + msecs_to_jiffies(5000); + + do { + mdelay(100); + } while (atomic_read(&dd->irq_workers_active) != 0 && + time_before(jiffies, to)); + + if (atomic_read(&dd->irq_workers_active) != 0) + dev_warn(&dd->pdev->dev, + "Completion workers still active!"); + + spin_lock(dd->queue->queue_lock); + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_queue_cmd, dd); + spin_unlock(dd->queue->queue_lock); + + set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); + + if (mtip_device_reset(dd)) + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_abort_cmd, dd); + + clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags); + } + if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { slot = 1; /* used to restrict the loop to one iteration */ @@ -3204,21 +3049,143 @@ } clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); - } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { - if (!mtip_ftl_rebuild_poll(dd)) - set_bit(MTIP_DDF_REBUILD_FAILED_BIT, - &dd->dd_flag); - clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); } - clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) - break; + if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { + if (mtip_ftl_rebuild_poll(dd) == 0) + clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); + } } + +st_out: return 0; } /* + * DMA region teardown + * + * @dd Pointer to driver_data structure + * + * return value + * None + */ +static void mtip_dma_free(struct driver_data *dd) +{ + struct mtip_port *port = dd->port; + + if (port->block1) + dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ, + port->block1, port->block1_dma); + + if (port->command_list) { + dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ, + port->command_list, port->command_list_dma); + } +} + +/* + * DMA region setup + * + * @dd Pointer to driver_data structure + * + * return value + * -ENOMEM Not enough free DMA region space to initialize driver + */ +static int mtip_dma_alloc(struct driver_data *dd) +{ + struct mtip_port *port = dd->port; + + /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */ + port->block1 = + dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ, + &port->block1_dma, GFP_KERNEL); + if (!port->block1) + return -ENOMEM; + memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ); + + /* Allocate dma memory for command list */ + port->command_list = + dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ, + &port->command_list_dma, GFP_KERNEL); + if (!port->command_list) { + dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ, + port->block1, port->block1_dma); + port->block1 = NULL; + port->block1_dma = 0; + return -ENOMEM; + } + memset(port->command_list, 0, AHCI_CMD_TBL_SZ); + + /* Setup all pointers into first DMA region */ + port->rxfis = port->block1 + AHCI_RX_FIS_OFFSET; + port->rxfis_dma = port->block1_dma + AHCI_RX_FIS_OFFSET; + port->identify = port->block1 + AHCI_IDFY_OFFSET; + port->identify_dma = port->block1_dma + AHCI_IDFY_OFFSET; + port->log_buf = port->block1 + AHCI_SECTBUF_OFFSET; + port->log_buf_dma = port->block1_dma + AHCI_SECTBUF_OFFSET; + port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET; + port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET; + + return 0; +} + +static int mtip_hw_get_identify(struct driver_data *dd) +{ + struct smart_attr attr242; + unsigned char *buf; + int rv; + + if (mtip_get_identify(dd->port, NULL) < 0) + return -EFAULT; + + if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == + MTIP_FTL_REBUILD_MAGIC) { + set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); + return MTIP_FTL_REBUILD_MAGIC; + } + mtip_dump_identify(dd->port); + + /* check write protect, over temp and rebuild statuses */ + rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, + dd->port->log_buf, + dd->port->log_buf_dma, 1); + if (rv) { + dev_warn(&dd->pdev->dev, + "Error in READ LOG EXT (10h) command\n"); + /* non-critical error, don't fail the load */ + } else { + buf = (unsigned char *)dd->port->log_buf; + if (buf[259] & 0x1) { + dev_info(&dd->pdev->dev, + "Write protect bit is set.\n"); + set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); + } + if (buf[288] == 0xF7) { + dev_info(&dd->pdev->dev, + "Exceeded Tmax, drive in thermal shutdown.\n"); + set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); + } + if (buf[288] == 0xBF) { + dev_info(&dd->pdev->dev, + "Drive indicates rebuild has failed.\n"); + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); + } + } + + /* get write protect progess */ + memset(&attr242, 0, sizeof(struct smart_attr)); + if (mtip_get_smart_attr(dd->port, 242, &attr242)) + dev_warn(&dd->pdev->dev, + "Unable to check write protect progress\n"); + else + dev_info(&dd->pdev->dev, + "Write protect progress: %u%% (%u blocks)\n", + attr242.cur, le32_to_cpu(attr242.data)); + + return rv; +} + +/* * Called once for each card. * * @dd Pointer to the driver data structure. @@ -3232,8 +3199,6 @@ int rv; unsigned int num_command_slots; unsigned long timeout, timetaken; - unsigned char *buf; - struct smart_attr attr242; dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; @@ -3264,8 +3229,6 @@ else dd->unal_qdepth = 0; - /* Counting semaphore to track command slot usage */ - sema_init(&dd->port->cmd_slot, num_command_slots - 1 - dd->unal_qdepth); sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth); /* Spinlock to prevent concurrent issue */ @@ -3276,83 +3239,10 @@ dd->port->mmio = dd->mmio + PORT_OFFSET; dd->port->dd = dd; - /* Allocate memory for the command list. */ - dd->port->command_list = - dmam_alloc_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), - &dd->port->command_list_dma, - GFP_KERNEL); - if (!dd->port->command_list) { - dev_err(&dd->pdev->dev, - "Memory allocation: command list\n"); - rv = -ENOMEM; + /* DMA allocations */ + rv = mtip_dma_alloc(dd); + if (rv < 0) goto out1; - } - - /* Clear the memory we have allocated. */ - memset(dd->port->command_list, - 0, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4)); - - /* Setup the addresse of the RX FIS. */ - dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ; - dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ; - - /* Setup the address of the command tables. */ - dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ; - dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ; - - /* Setup the address of the identify data. */ - dd->port->identify = dd->port->command_table + - HW_CMD_TBL_AR_SZ; - dd->port->identify_dma = dd->port->command_tbl_dma + - HW_CMD_TBL_AR_SZ; - - /* Setup the address of the sector buffer - for some non-ncq cmds */ - dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE; - dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE; - - /* Setup the address of the log buf - for read log command */ - dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE; - dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE; - - /* Setup the address of the smart buf - for smart read data command */ - dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE; - dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE; - - - /* Point the command headers at the command tables. */ - for (i = 0; i < num_command_slots; i++) { - dd->port->commands[i].command_header = - dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * i); - dd->port->commands[i].command_header_dma = - dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * i); - - dd->port->commands[i].command = - dd->port->command_table + (HW_CMD_TBL_SZ * i); - dd->port->commands[i].command_dma = - dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i); - - if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64) - dd->port->commands[i].command_header->ctbau = - __force_bit2int cpu_to_le32( - (dd->port->commands[i].command_dma >> 16) >> 16); - dd->port->commands[i].command_header->ctba = - __force_bit2int cpu_to_le32( - dd->port->commands[i].command_dma & 0xFFFFFFFF); - - /* - * If this is not done, a bug is reported by the stock - * FC11 i386. Due to the fact that it has lots of kernel - * debugging enabled. - */ - sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG); - - /* Mark all commands as currently inactive.*/ - atomic_set(&dd->port->commands[i].active, 0); - } /* Setup the pointers to the extended s_active and CI registers. */ for (i = 0; i < dd->slot_groups; i++) { @@ -3423,73 +3313,16 @@ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, dd->mmio + HOST_CTL); - init_timer(&dd->port->cmd_timer); init_waitqueue_head(&dd->port->svc_wait); - dd->port->cmd_timer.data = (unsigned long int) dd->port; - dd->port->cmd_timer.function = mtip_timeout_function; - mod_timer(&dd->port->cmd_timer, - jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); - - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { rv = -EFAULT; goto out3; } - if (mtip_get_identify(dd->port, NULL) < 0) { - rv = -EFAULT; - goto out3; - } - - if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == - MTIP_FTL_REBUILD_MAGIC) { - set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); - return MTIP_FTL_REBUILD_MAGIC; - } - mtip_dump_identify(dd->port); - - /* check write protect, over temp and rebuild statuses */ - rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, - dd->port->log_buf, - dd->port->log_buf_dma, 1); - if (rv) { - dev_warn(&dd->pdev->dev, - "Error in READ LOG EXT (10h) command\n"); - /* non-critical error, don't fail the load */ - } else { - buf = (unsigned char *)dd->port->log_buf; - if (buf[259] & 0x1) { - dev_info(&dd->pdev->dev, - "Write protect bit is set.\n"); - set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); - } - if (buf[288] == 0xF7) { - dev_info(&dd->pdev->dev, - "Exceeded Tmax, drive in thermal shutdown.\n"); - set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); - } - if (buf[288] == 0xBF) { - dev_info(&dd->pdev->dev, - "Drive indicates rebuild has failed.\n"); - /* TODO */ - } - } - - /* get write protect progess */ - memset(&attr242, 0, sizeof(struct smart_attr)); - if (mtip_get_smart_attr(dd->port, 242, &attr242)) - dev_warn(&dd->pdev->dev, - "Unable to check write protect progress\n"); - else - dev_info(&dd->pdev->dev, - "Write protect progress: %u%% (%u blocks)\n", - attr242.cur, le32_to_cpu(attr242.data)); return rv; out3: - del_timer_sync(&dd->port->cmd_timer); - /* Disable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); @@ -3500,12 +3333,8 @@ out2: mtip_deinit_port(dd->port); + mtip_dma_free(dd); - /* Free the command/command header memory. */ - dmam_free_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), - dd->port->command_list, - dd->port->command_list_dma); out1: /* Free the memory allocated for the for structure. */ kfree(dd->port); @@ -3513,6 +3342,27 @@ return rv; } +static int mtip_standby_drive(struct driver_data *dd) +{ + int rv = 0; + + if (dd->sr || !dd->port) + return -ENODEV; + /* + * Send standby immediate (E0h) to the drive so that it + * saves its state. + */ + if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) && + !test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) && + !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) { + rv = mtip_standby_immediate(dd->port); + if (rv) + dev_warn(&dd->pdev->dev, + "STANDBY IMMEDIATE failed\n"); + } + return rv; +} + /* * Called to deinitialize an interface. * @@ -3523,17 +3373,7 @@ */ static int mtip_hw_exit(struct driver_data *dd) { - /* - * Send standby immediate (E0h) to the drive so that it - * saves its state. - */ - if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { - - if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) - if (mtip_standby_immediate(dd->port)) - dev_warn(&dd->pdev->dev, - "STANDBY IMMEDIATE failed\n"); - + if (!dd->sr) { /* de-initialize the port. */ mtip_deinit_port(dd->port); @@ -3542,19 +3382,17 @@ dd->mmio + HOST_CTL); } - del_timer_sync(&dd->port->cmd_timer); - /* Release the IRQ. */ irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); + msleep(1000); + + /* Free dma regions */ + mtip_dma_free(dd); - /* Free the command/command header memory. */ - dmam_free_coherent(&dd->pdev->dev, - HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), - dd->port->command_list, - dd->port->command_list_dma); /* Free the memory allocated for the for structure. */ kfree(dd->port); + dd->port = NULL; return 0; } @@ -3576,7 +3414,7 @@ * Send standby immediate (E0h) to the drive so that it * saves its state. */ - mtip_standby_immediate(dd->port); + mtip_standby_drive(dd); return 0; } @@ -3599,7 +3437,7 @@ * Send standby immediate (E0h) to the drive * so that it saves its state. */ - if (mtip_standby_immediate(dd->port) != 0) { + if (mtip_standby_drive(dd) != 0) { dev_err(&dd->pdev->dev, "Failed standby-immediate command\n"); return -EFAULT; @@ -3837,6 +3675,28 @@ return 0; } +static int mtip_block_open(struct block_device *dev, fmode_t mode) +{ + struct driver_data *dd; + + if (dev && dev->bd_disk) { + dd = (struct driver_data *) dev->bd_disk->private_data; + + if (dd) { + if (test_bit(MTIP_DDF_REMOVAL_BIT, + &dd->dd_flag)) { + return -ENODEV; + } + return 0; + } + } + return -ENODEV; +} + +void mtip_block_release(struct gendisk *disk, fmode_t mode) +{ +} + /* * Block device operation function. * @@ -3844,6 +3704,8 @@ * layer. */ static const struct block_device_operations mtip_block_ops = { + .open = mtip_block_open, + .release = mtip_block_release, .ioctl = mtip_block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mtip_block_compat_ioctl, @@ -3852,6 +3714,26 @@ .owner = THIS_MODULE }; +static inline bool is_se_active(struct driver_data *dd) +{ + if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) { + if (dd->port->ic_pause_timer) { + unsigned long to = dd->port->ic_pause_timer + + msecs_to_jiffies(1000); + if (time_after(jiffies, to)) { + clear_bit(MTIP_PF_SE_ACTIVE_BIT, + &dd->port->flags); + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); + dd->port->ic_pause_timer = 0; + wake_up_interruptible(&dd->port->svc_wait); + return false; + } + } + return true; + } + return false; +} + /* * Block layer make request function. * @@ -3860,94 +3742,174 @@ * * @queue Pointer to the request queue. Unused other than to obtain * the driver data structure. - * @bio Pointer to the BIO. + * @rq Pointer to the request. * */ -static void mtip_make_request(struct request_queue *queue, struct bio *bio) +static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct driver_data *dd = queue->queuedata; - struct scatterlist *sg; - struct bio_vec *bvec; - int i, nents = 0; - int tag = 0, unaligned = 0; + struct driver_data *dd = hctx->queue->queuedata; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + unsigned int nents; + + if (is_se_active(dd)) + return -ENODATA; if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { - bio_endio(bio, -ENXIO); - return; + return -ENXIO; } if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { - bio_endio(bio, -ENODATA); - return; + return -ENODATA; } if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) && - bio_data_dir(bio))) { - bio_endio(bio, -ENODATA); - return; - } - if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) { - bio_endio(bio, -ENODATA); - return; + rq_data_dir(rq))) { + return -ENODATA; } + if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) || + test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))) + return -ENODATA; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { - bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, - bio_sectors(bio))); - return; - } + if (rq->cmd_flags & REQ_DISCARD) { + int err; - if (unlikely(!bio_has_data(bio))) { - blk_queue_flush(queue, 0); - bio_endio(bio, 0); - return; + err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); + blk_mq_end_request(rq, err); + return 0; } - if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 && - dd->unal_qdepth) { - if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */ - unaligned = 1; - else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */ - unaligned = 1; + /* Create the scatter list for this request. */ + nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg); + + /* Issue the read/write. */ + mtip_hw_submit_io(dd, rq, cmd, nents, hctx); + return 0; +} + +static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + struct driver_data *dd = hctx->queue->queuedata; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + + if (rq_data_dir(rq) == READ || !dd->unal_qdepth) + return false; + + /* + * If unaligned depth must be limited on this controller, mark it + * as unaligned if the IO isn't on a 4k boundary (start of length). + */ + if (blk_rq_sectors(rq) <= 64) { + if ((blk_rq_pos(rq) & 7) || (blk_rq_sectors(rq) & 7)) + cmd->unaligned = 1; } - sg = mtip_hw_get_scatterlist(dd, &tag, unaligned); - if (likely(sg != NULL)) { - blk_queue_bounce(queue, &bio); + if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal)) + return true; - if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { - dev_warn(&dd->pdev->dev, - "Maximum number of SGL entries exceeded\n"); - bio_io_error(bio); - mtip_hw_release_scatterlist(dd, tag, unaligned); - return; - } + return false; +} - /* Create the scatter list for this bio. */ - bio_for_each_segment(bvec, bio, i) { - sg_set_page(&sg[nents], - bvec->bv_page, - bvec->bv_len, - bvec->bv_offset); - nents++; - } - - /* Issue the read/write. */ - mtip_hw_submit_io(dd, - bio->bi_sector, - bio_sectors(bio), - nents, - tag, - bio_endio, - bio, - bio_data_dir(bio), - unaligned); - } else - bio_io_error(bio); +static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *rq = bd->rq; + int ret; + + if (unlikely(mtip_check_unal_depth(hctx, rq))) + return BLK_MQ_RQ_QUEUE_BUSY; + + blk_mq_start_request(rq); + + ret = mtip_submit_request(hctx, rq); + if (likely(!ret)) + return BLK_MQ_RQ_QUEUE_OK; + + rq->errors = ret; + return BLK_MQ_RQ_QUEUE_ERROR; +} + +static void mtip_free_cmd(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int request_idx) +{ + struct driver_data *dd = data; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + + if (!cmd->command) + return; + + dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, + cmd->command, cmd->command_dma); +} + +static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, + unsigned int request_idx, unsigned int numa_node) +{ + struct driver_data *dd = data; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; + + /* + * For flush requests, request_idx starts at the end of the + * tag space. Since we don't support FLUSH/FUA, simply return + * 0 as there's nothing to be done. + */ + if (request_idx >= MTIP_MAX_COMMAND_SLOTS) + return 0; + + cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, + &cmd->command_dma, GFP_KERNEL); + if (!cmd->command) + return -ENOMEM; + + memset(cmd->command, 0, CMD_DMA_ALLOC_SZ); + + /* Point the command headers at the command tables. */ + cmd->command_header = dd->port->command_list + + (sizeof(struct mtip_cmd_hdr) * request_idx); + cmd->command_header_dma = dd->port->command_list_dma + + (sizeof(struct mtip_cmd_hdr) * request_idx); + + if (host_cap_64) + cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); + + cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); + + sg_init_table(cmd->sg, MTIP_MAX_SG); + return 0; } +static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, + bool reserved) +{ + struct driver_data *dd = req->q->queuedata; + int ret = BLK_EH_RESET_TIMER; + + if (reserved) + goto exit_handler; + + if (test_bit(req->tag, dd->port->cmds_to_issue)) + goto exit_handler; + + if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags)) + goto exit_handler; + + wake_up_interruptible(&dd->port->svc_wait); +exit_handler: + return ret; +} + +static struct blk_mq_ops mtip_mq_ops = { + .queue_rq = mtip_queue_rq, + .map_queue = blk_mq_map_queue, + .init_request = mtip_init_cmd, + .exit_request = mtip_free_cmd, + .complete = mtip_softirq_done_fn, + .timeout = mtip_cmd_timeout, +}; + /* * Block layer initialization function. * @@ -3965,16 +3927,11 @@ sector_t capacity; unsigned int index = 0; struct kobject *kobj; - unsigned char thd_name[16]; if (dd->disk) goto skip_create_disk; /* hw init done, before rebuild */ - /* Initialize the protocol layer. */ - wait_for_rebuild = mtip_hw_init(dd); - if (wait_for_rebuild < 0) { - dev_err(&dd->pdev->dev, - "Protocol layer initialization failed\n"); + if (mtip_hw_init(dd)) { rv = -EINVAL; goto protocol_init_error; } @@ -4009,36 +3966,64 @@ dd->disk->driverfs_dev = &dd->pdev->dev; dd->disk->major = dd->major; - dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS; + dd->disk->first_minor = index * MTIP_MAX_MINORS; + dd->disk->minors = MTIP_MAX_MINORS; dd->disk->fops = &mtip_block_ops; dd->disk->private_data = dd; dd->index = index; - /* - * if rebuild pending, start the service thread, and delay the block - * queue creation and add_disk() - */ - if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) - goto start_service_thread; + mtip_hw_debugfs_init(dd); + + memset(&dd->tags, 0, sizeof(dd->tags)); + dd->tags.ops = &mtip_mq_ops; + dd->tags.nr_hw_queues = 1; + dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS; + dd->tags.reserved_tags = 1; + dd->tags.cmd_size = sizeof(struct mtip_cmd); + dd->tags.numa_node = dd->numa_node; + dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; + dd->tags.driver_data = dd; + dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; + + rv = blk_mq_alloc_tag_set(&dd->tags); + if (rv) { + dev_err(&dd->pdev->dev, + "Unable to allocate request queue\n"); + goto block_queue_alloc_tag_error; + } -skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); - if (dd->queue == NULL) { + dd->queue = blk_mq_init_queue(&dd->tags); + if (IS_ERR(dd->queue)) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); rv = -ENOMEM; goto block_queue_alloc_init_error; } - /* Attach our request function to the request queue. */ - blk_queue_make_request(dd->queue, mtip_make_request); - dd->disk->queue = dd->queue; dd->queue->queuedata = dd; +skip_create_disk: + /* Initialize the protocol layer. */ + wait_for_rebuild = mtip_hw_get_identify(dd); + if (wait_for_rebuild < 0) { + dev_err(&dd->pdev->dev, + "Protocol layer initialization failed\n"); + rv = -EINVAL; + goto init_hw_cmds_error; + } + + /* + * if rebuild pending, start the service thread, and delay the block + * queue creation and add_disk() + */ + if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) + goto start_service_thread; + /* Set device limits. */ set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); + clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags); blk_queue_max_segments(dd->queue, MTIP_MAX_SG); blk_queue_physical_block_size(dd->queue, 4096); blk_queue_max_hw_sectors(dd->queue, 0xffff); @@ -4073,6 +4058,7 @@ /* Enable the block device and add it to /dev */ add_disk(dd->disk); + dd->bdev = bdget_disk(dd->disk, 0); /* * Now that the disk is active, initialize any sysfs attributes * managed by the protocol layer. @@ -4082,7 +4068,6 @@ mtip_hw_sysfs_init(dd, kobj); kobject_put(kobj); } - mtip_hw_debugfs_init(dd); if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); @@ -4090,9 +4075,9 @@ } start_service_thread: - sprintf(thd_name, "mtip_svc_thd_%02d", index); dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, - dd, dd->numa_node, thd_name); + dd, dd->numa_node, + "mtip_svc_thd_%02d", index); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -4107,15 +4092,19 @@ return rv; kthread_run_error: - mtip_hw_debugfs_exit(dd); + bdput(dd->bdev); + dd->bdev = NULL; /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); read_capacity_error: +init_hw_cmds_error: blk_cleanup_queue(dd->queue); - block_queue_alloc_init_error: + blk_mq_free_tag_set(&dd->tags); +block_queue_alloc_tag_error: + mtip_hw_debugfs_exit(dd); disk_index_error: spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, index); @@ -4131,6 +4120,22 @@ return rv; } +static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +{ + struct driver_data *dd = (struct driver_data *)data; + struct mtip_cmd *cmd; + + if (likely(!reserv)) + blk_mq_complete_request(rq, -ENODEV); + else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + + cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); + if (cmd->comp_func) + cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, + cmd, -ENODEV); + } +} + /* * Block layer deinitialization function. * @@ -4145,6 +4150,8 @@ { struct kobject *kobj; + mtip_hw_debugfs_exit(dd); + if (dd->mtip_svc_handler) { set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); wake_up_interruptible(&dd->port->svc_wait); @@ -4159,27 +4166,48 @@ kobject_put(kobj); } } - mtip_hw_debugfs_exit(dd); + + if (!dd->sr) { + /* + * Explicitly wait here for IOs to quiesce, + * as mtip_standby_drive usually won't wait for IOs. + */ + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, + GFP_KERNEL)) + mtip_standby_drive(dd); + } + else + dev_info(&dd->pdev->dev, "device %s surprise removal\n", + dd->disk->disk_name); + + blk_mq_freeze_queue_start(dd->queue); + blk_mq_stop_hw_queues(dd->queue); + blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd); /* * Delete our gendisk structure. This also removes the device * from /dev */ + if (dd->bdev) { + bdput(dd->bdev); + dd->bdev = NULL; + } if (dd->disk) { - if (dd->disk->queue) + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); - else - put_disk(dd->disk); + if (dd->disk->queue) { + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + dd->queue = NULL; + } + put_disk(dd->disk); } + dd->disk = NULL; spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); spin_unlock(&rssd_index_lock); - blk_cleanup_queue(dd->queue); - dd->disk = NULL; - dd->queue = NULL; - /* De-initialize the protocol layer. */ mtip_hw_exit(dd); @@ -4200,16 +4228,20 @@ */ static int mtip_block_shutdown(struct driver_data *dd) { + mtip_hw_shutdown(dd); + /* Delete our gendisk structure, and cleanup the blk queue. */ if (dd->disk) { dev_info(&dd->pdev->dev, "Shutting down %s ...\n", dd->disk->disk_name); - if (dd->disk->queue) { + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); + if (dd->disk->queue) { blk_cleanup_queue(dd->queue); - } else - put_disk(dd->disk); + blk_mq_free_tag_set(&dd->tags); + } + put_disk(dd->disk); dd->disk = NULL; dd->queue = NULL; } @@ -4217,8 +4249,6 @@ spin_lock(&rssd_index_lock); ida_remove(&rssd_index_ida, dd->index); spin_unlock(&rssd_index_lock); - - mtip_hw_shutdown(dd); return 0; } @@ -4370,7 +4400,7 @@ } dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), - cpu_to_node(smp_processor_id()), smp_processor_id()); + cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { @@ -4487,7 +4517,7 @@ if (rv) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); - goto block_initialize_err; + goto msi_initialize_err; } mtip_fix_ero_nosnoop(dd, pdev); @@ -4519,6 +4549,8 @@ block_initialize_err: pci_disable_msi(pdev); + +msi_initialize_err: if (dd->isr_workq) { flush_workqueue(dd->isr_workq); destroy_workqueue(dd->isr_workq); @@ -4547,28 +4579,36 @@ static void mtip_pci_remove(struct pci_dev *pdev) { struct driver_data *dd = pci_get_drvdata(pdev); - int counter = 0; - unsigned long flags; + unsigned long flags, to; - set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); spin_lock_irqsave(&dev_lock, flags); list_del_init(&dd->online_list); list_add(&dd->remove_list, &removing_list); spin_unlock_irqrestore(&dev_lock, flags); - if (mtip_check_surprise_removal(pdev)) { - while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { - counter++; - msleep(20); - if (counter == 10) { - /* Cleanup the outstanding commands */ - mtip_command_cleanup(dd); - break; - } - } + mtip_check_surprise_removal(pdev); + synchronize_irq(dd->pdev->irq); + + /* Spin until workers are done */ + to = jiffies + msecs_to_jiffies(4000); + do { + msleep(20); + } while (atomic_read(&dd->irq_workers_active) != 0 && + time_before(jiffies, to)); + + if (!dd->sr) + fsync_bdev(dd->bdev); + + if (atomic_read(&dd->irq_workers_active) != 0) { + dev_warn(&dd->pdev->dev, + "Completion workers still active!\n"); } + blk_set_queue_dying(dd->queue); + set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + /* Clean up the block layer. */ mtip_block_remove(dd); @@ -4587,7 +4627,9 @@ spin_unlock_irqrestore(&dev_lock, flags); kfree(dd); + pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); + pci_set_drvdata(pdev, NULL); } /* @@ -4693,7 +4735,7 @@ } /* Table of device ids supported by this driver. */ -static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { +static const struct pci_device_id mtip_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },