--- zzzz-none-000/linux-3.10.107/arch/powerpc/platforms/pseries/iommu.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/powerpc/platforms/pseries/iommu.c 2021-02-04 17:41:59.000000000 +0000 @@ -30,13 +30,14 @@ #include #include #include -#include /* for show_stack */ #include #include #include #include #include #include +#include +#include #include #include #include @@ -48,12 +49,79 @@ #include #include #include +#include -#include "plpar_wrappers.h" +#include "pseries.h" +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl = NULL; + struct iommu_table_group_link *tgl = NULL; + + table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, + node); + if (!table_group) + goto fail_exit; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); + if (!tbl) + goto fail_exit; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + goto fail_exit; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[0] = tbl; + + return table_group; + +fail_exit: + kfree(tgl); + kfree(table_group); + kfree(tbl); + + return NULL; +} + +static void iommu_pseries_free_group(struct iommu_table_group *table_group, + const char *node_name) +{ + struct iommu_table *tbl; +#ifdef CONFIG_IOMMU_API + struct iommu_table_group_link *tgl; +#endif + + if (!table_group) + return; + + tbl = table_group->tables[0]; +#ifdef CONFIG_IOMMU_API + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + + WARN_ON_ONCE(!tgl); + if (tgl) { + list_del_rcu(&tgl->next); + kfree(tgl); + } + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); + } +#endif + iommu_free_table(tbl, node_name); + + kfree(table_group); +} static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, - u64 *startp, u64 *endp) + __be64 *startp, __be64 *endp) { u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; unsigned long start, end, inc; @@ -87,7 +155,7 @@ struct dma_attrs *attrs) { u64 proto_tce; - u64 *tcep, *tces; + __be64 *tcep, *tces; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@ -95,12 +163,12 @@ if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((u64 *)tbl->it_base) + index; + tces = tcep = ((__be64 *)tbl->it_base) + index; while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); uaddr += TCE_PAGE_SIZE; tcep++; @@ -114,9 +182,9 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) { - u64 *tcep, *tces; + __be64 *tcep, *tces; - tces = tcep = ((u64 *)tbl->it_base) + index; + tces = tcep = ((__be64 *)tbl->it_base) + index; while (npages--) *(tcep++) = 0; @@ -127,11 +195,11 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) { - u64 *tcep; + __be64 *tcep; - tcep = ((u64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; - return *tcep; + return be64_to_cpu(*tcep); } static void tce_free_pSeriesLP(struct iommu_table*, long, long); @@ -169,7 +237,7 @@ printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } tcenum++; @@ -178,7 +246,7 @@ return ret; } -static DEFINE_PER_CPU(u64 *, tce_page); +static DEFINE_PER_CPU(__be64 *, tce_page); static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -187,34 +255,34 @@ { u64 rc = 0; u64 proto_tce; - u64 *tcep; + __be64 *tcep; u64 rpn; long l, limit; long tcenum_start = tcenum, npages_start = npages; int ret = 0; unsigned long flags; - if (npages == 1) { + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() */ if (!tcep) { - tcep = (u64 *)__get_free_page(GFP_ATOMIC); + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -231,7 +299,7 @@ limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { - tcep[l] = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); rpn++; } @@ -258,7 +326,7 @@ printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\tnpages = 0x%llx\n", (u64)npages); printk("\ttce[0] val = 0x%llx\n", tcep[0]); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } return ret; } @@ -274,7 +342,7 @@ printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } tcenum++; @@ -286,6 +354,9 @@ { u64 rc; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) + return tce_free_pSeriesLP(tbl, tcenum, npages); + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); if (rc && printk_ratelimit()) { @@ -293,7 +364,7 @@ printk("\trc = %lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\tnpages = 0x%llx\n", (u64)npages); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } } @@ -308,7 +379,7 @@ printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } return tce_ret; @@ -393,21 +464,22 @@ unsigned long num_pfn, const void *arg) { const struct dynamic_dma_window_prop *maprange = arg; - u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn; + u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; + __be64 *tcep; u32 tce_shift; u64 rc = 0; long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { - tcep = (u64 *)__get_free_page(GFP_ATOMIC); + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); if (!tcep) { local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; @@ -436,7 +508,7 @@ dma_offset = next + be64_to_cpu(maprange->dma_base); for (l = 0; l < limit; l++) { - tcep[l] = proto_tce | next; + tcep[l] = cpu_to_be64(proto_tce | next); next += tce_size; } @@ -460,8 +532,6 @@ return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); } - -#ifdef CONFIG_PCI static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl) @@ -486,9 +556,10 @@ memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT; + tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { @@ -499,7 +570,7 @@ phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT; + tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_blocksize = 16; @@ -530,20 +601,27 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl, - const void *dma_window) + const __be32 *dma_window) { unsigned long offset, size; of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; tbl->it_base = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> tbl->it_page_shift; + tbl->it_size = size >> tbl->it_page_shift; } +struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, + .get = tce_get_pseries +}; + static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) { struct device_node *dn; @@ -573,8 +651,7 @@ while (isa_dn && isa_dn != dn) isa_dn = isa_dn->parent; - if (isa_dn_orig) - of_node_put(isa_dn_orig); + of_node_put(isa_dn_orig); /* Count number of direct PCI children of the PHB. */ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) @@ -609,11 +686,13 @@ pci->phb->dma_window_size = 0x8000000ul; pci->phb->dma_window_base_cur = 0x8000000ul; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + tbl->it_ops = &iommu_table_pseries_ops; + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -623,13 +702,18 @@ pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); } +struct iommu_table_ops iommu_table_lpar_multi_ops = { + .set = tce_buildmulti_pSeriesLP, + .clear = tce_freemulti_pSeriesLP, + .get = tce_get_pSeriesLP +}; static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; dn = pci_bus_to_OF_node(bus); @@ -651,14 +735,17 @@ ppci = PCI_DN(pdn); pr_debug(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pdn->full_name, ppci->table_group); - if (!ppci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - ppci->phb->node); + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); - ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - pr_debug(" created table: %p\n", ppci->iommu_table); + tbl->it_ops = &iommu_table_lpar_multi_ops; + iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); } } @@ -680,11 +767,15 @@ struct pci_controller *phb = PCI_DN(dn)->phb; pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->node); + PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); + tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); - PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + tbl->it_ops = &iommu_table_pseries_ops; + iommu_init_table(tbl, phb->node); + iommu_register_group(PCI_DN(dn)->table_group, + pci_domain_nr(phb->bus), 0); + set_iommu_table_base(&dev->dev, tbl); + iommu_add_device(&dev->dev); return; } @@ -692,12 +783,14 @@ * an already allocated iommu table is found and use that. */ - while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) + while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) dn = dn->parent; - if (dn && PCI_DN(dn)) - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); - else + if (dn && PCI_DN(dn)) { + set_iommu_table_base(&dev->dev, + PCI_DN(dn)->table_group->tables[0]); + iommu_add_device(&dev->dev); + } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); } @@ -714,35 +807,22 @@ early_param("disable_ddw", disable_ddw_setup); -static inline void __remove_ddw(struct device_node *np, const u32 *ddw_avail, u64 liobn) -{ - int ret; - - ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); - if (ret) - pr_warning("%s: failed to remove DMA window: rtas returned " - "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddw_avail[2], liobn); - else - pr_debug("%s: successfully removed DMA window: rtas returned " - "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddw_avail[2], liobn); -} - -static void remove_ddw(struct device_node *np) +static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -758,10 +838,19 @@ pr_debug("%s successfully cleared tces in window.\n", np->full_name); - __remove_ddw(np, ddw_avail, liobn); + ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); + if (ret) + pr_warning("%s: failed to remove direct window: rtas returned " + "%d to ibm,remove-pe-dma-window(%x) %llx\n", + np->full_name, ret, ddw_avail[2], liobn); + else + pr_debug("%s: successfully removed direct window: rtas returned " + "%d to ibm,remove-pe-dma-window(%x) %llx\n", + np->full_name, ret, ddw_avail[2], liobn); delprop: - ret = of_remove_property(np, win64); + if (remove_prop) + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -778,7 +867,7 @@ list_for_each_entry(window, &direct_window_list, list) { if (window->device == pdn) { direct64 = window->prop; - dma_addr = direct64->dma_base; + dma_addr = be64_to_cpu(direct64->dma_base); break; } } @@ -787,68 +876,33 @@ return dma_addr; } -static void __restore_default_window(struct eeh_dev *edev, - u32 ddw_restore_token) -{ - u32 cfg_addr; - u64 buid; - int ret; - - /* - * Get the config address and phb buid of the PE window. - * Rely on eeh to retrieve this for us. - * Retrieve them from the pci device, not the node with the - * dma-window property - */ - cfg_addr = edev->config_addr; - if (edev->pe_config_addr) - cfg_addr = edev->pe_config_addr; - buid = edev->phb->buid; - - do { - ret = rtas_call(ddw_restore_token, 3, 1, NULL, cfg_addr, - BUID_HI(buid), BUID_LO(buid)); - } while (rtas_busy_delay(ret)); - pr_info("ibm,reset-pe-dma-windows(%x) %x %x %x returned %d\n", - ddw_restore_token, cfg_addr, BUID_HI(buid), BUID_LO(buid), ret); -} - static int find_existing_ddw_windows(void) { + int len; struct device_node *pdn; + struct direct_window *window; const struct dynamic_dma_window_prop *direct64; - const u32 *ddw_extensions; if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; for_each_node_with_property(pdn, DIRECT64_PROPNAME) { - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, NULL); + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); if (!direct64) continue; - /* - * We need to ensure the IOMMU table is active when we - * return from the IOMMU setup so that the common code - * can clear the table or find the holes. To that end, - * first, remove any existing DDW configuration. - */ - remove_ddw(pdn); + window = kzalloc(sizeof(*window), GFP_KERNEL); + if (!window || len < sizeof(struct dynamic_dma_window_prop)) { + kfree(window); + remove_ddw(pdn, true); + continue; + } - /* - * Second, if we are running on a new enough level of - * firmware where the restore API is present, use it to - * restore the 32-bit window, which was removed in - * create_ddw. - * If the API is not present, then create_ddw couldn't - * have removed the 32-bit window in the first place, so - * removing the DDW configuration should be sufficient. - */ - ddw_extensions = of_get_property(pdn, "ibm,ddw-extensions", - NULL); - if (ddw_extensions && ddw_extensions[0] > 0) - __restore_default_window(of_node_to_eeh_dev(pdn), - ddw_extensions[1]); + window->device = pdn; + window->prop = direct64; + spin_lock(&direct_window_list_lock); + list_add(&window->list, &direct_window_list); + spin_unlock(&direct_window_list_lock); } return 0; @@ -906,8 +960,9 @@ do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -918,12 +973,6 @@ return ret; } -static void restore_default_window(struct pci_dev *dev, - u32 ddw_restore_token) -{ - __restore_default_window(pci_dev_to_eeh_dev(dev), ddw_restore_token); -} - struct failed_ddw_pdn { struct device_node *pdn; struct list_head list; @@ -950,14 +999,10 @@ int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); - const u32 *uninitialized_var(ddw_extensions); - u32 ddw_restore_token = 0; + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; - const void *dma_window = NULL; - unsigned long liobn, offset, size; struct failed_ddw_pdn *fpdn; mutex_lock(&direct_window_init_mutex); @@ -986,44 +1031,12 @@ * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) - goto out_unlock; - - /* - * the extensions property is only required to exist in certain - * levels of firmware and later - * the ibm,ddw-extensions property is a list with the first - * element containing the number of extensions and each - * subsequent entry is a value corresponding to that extension - */ - ddw_extensions = of_get_property(pdn, "ibm,ddw-extensions", &len); - if (ddw_extensions) { - /* - * each new defined extension length should be added to - * the top of the switch so the "earlier" entries also - * get picked up - */ - switch (ddw_extensions[0]) { - /* ibm,reset-pe-dma-windows */ - case 1: - ddw_restore_token = ddw_extensions[1]; - break; - } - } - - /* - * Only remove the existing DMA window if we can restore back to - * the default state. Removing the existing window maximizes the - * resources available to firmware for dynamic window creation. - */ - if (ddw_restore_token) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - of_parse_dma_window(pdn, dma_window, &liobn, &offset, &size); - __remove_ddw(pdn, ddw_avail, liobn); - } + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) + goto out_failed; - /* + /* * Query if there is a second window of size to map the * whole partition. Query returns number of windows, largest * block assigned to PE (partition endpoint), and two bitmasks @@ -1032,7 +1045,7 @@ dn = pci_device_to_OF_node(dev); ret = query_ddw(dev, ddw_avail, &query); if (ret != 0) - goto out_restore_window; + goto out_failed; if (query.windows_available == 0) { /* @@ -1041,7 +1054,7 @@ * trading in for a larger page size. */ dev_dbg(&dev->dev, "no free dynamic windows"); - goto out_restore_window; + goto out_failed; } if (query.page_size & 4) { page_shift = 24; /* 16MB */ @@ -1052,7 +1065,7 @@ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", query.page_size); - goto out_restore_window; + goto out_failed; } /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ @@ -1061,14 +1074,14 @@ dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); - goto out_restore_window; + goto out_failed; } len = order_base_2(max_addr); win64 = kzalloc(sizeof(struct property), GFP_KERNEL); if (!win64) { dev_info(&dev->dev, "couldn't allocate property for 64bit dma window\n"); - goto out_restore_window; + goto out_failed; } win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); @@ -1084,7 +1097,8 @@ goto out_free_prop; ddwprop->liobn = cpu_to_be32(create.liobn); - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1116,23 +1130,21 @@ list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: kfree(window); out_clear_window: - remove_ddw(pdn); + remove_ddw(pdn, true); out_free_prop: kfree(win64->name); kfree(win64->value); kfree(win64); -out_restore_window: - if (ddw_restore_token) - restore_default_window(dev, ddw_restore_token); +out_failed: fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); if (!fpdn) @@ -1149,7 +1161,7 @@ { struct device_node *pdn, *dn; struct iommu_table *tbl; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; struct pci_dn *pci; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1163,7 +1175,7 @@ dn = pci_device_to_OF_node(dev); pr_debug(" node is %s\n", dn->full_name); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1179,17 +1191,21 @@ pr_debug(" parent is %s\n", pdn->full_name); pci = PCI_DN(pdn); - if (!pci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - pr_debug(" created table: %p\n", pci->iommu_table); + tbl->it_ops = &iommu_table_lpar_multi_ops; + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, + pci_domain_nr(pci->phb->bus), 0); + pr_debug(" created table: %p\n", pci->table_group); } else { - pr_debug(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->table_group); } - set_iommu_table_base(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); + iommu_add_device(&dev->dev); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) @@ -1197,7 +1213,7 @@ bool ddw_enabled = false; struct device_node *pdn, *dn; struct pci_dev *pdev; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; u64 dma_offset; if (!dev->dma_mask) @@ -1219,7 +1235,7 @@ * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1236,11 +1252,10 @@ } } - /* fall back on iommu ops, restore table pointer with ops */ + /* fall back on iommu ops */ if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) { dev_info(dev, "Restoring 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); - pci_dma_dev_setup_pSeriesLP(pdev); } check_mask: @@ -1263,7 +1278,7 @@ dn = pci_device_to_OF_node(pdev); /* search upwards for ibm,dma-window */ - for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table; + for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group; dn = dn->parent) if (of_get_property(dn, "ibm,dma-window", NULL)) break; @@ -1276,15 +1291,6 @@ return dma_iommu_ops.get_required_mask(dev); } -#else /* CONFIG_PCI */ -#define pci_dma_bus_setup_pSeries NULL -#define pci_dma_dev_setup_pSeries NULL -#define pci_dma_bus_setup_pSeriesLP NULL -#define pci_dma_dev_setup_pSeriesLP NULL -#define dma_set_mask_pSeriesLP NULL -#define dma_get_required_mask_pSeriesLP NULL -#endif /* !CONFIG_PCI */ - static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1325,18 +1331,27 @@ .notifier_call = iommu_mem_notifier, }; -static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { int err = NOTIFY_OK; - struct device_node *np = node; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; struct pci_dn *pci = PCI_DN(np); struct direct_window *window; switch (action) { case OF_RECONFIG_DETACH_NODE: - remove_ddw(np); - if (pci && pci->iommu_table) - iommu_free_table(pci->iommu_table, np->full_name); + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + remove_ddw(np, false); + if (pci && pci->table_group) + iommu_pseries_free_group(pci->table_group, + np->full_name); spin_lock(&direct_window_list_lock); list_for_each_entry(window, &direct_window_list, list) { @@ -1366,24 +1381,13 @@ return; if (firmware_has_feature(FW_FEATURE_LPAR)) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { - ppc_md.tce_build = tce_buildmulti_pSeriesLP; - ppc_md.tce_free = tce_freemulti_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; - } - ppc_md.tce_get = tce_get_pSeriesLP; - ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP; - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP; + pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; + pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP; } else { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; - ppc_md.tce_get = tce_get_pseries; - ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeries; - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeries; + pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; + pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; } @@ -1399,11 +1403,11 @@ firmware_has_feature(FW_FEATURE_LPAR) && firmware_has_feature(FW_FEATURE_MULTITCE)) { printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; } return 1; } __setup("multitce=", disable_multitce); + +machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);