--- zzzz-none-000/linux-3.10.107/arch/s390/pci/pci_dma.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/arch/s390/pci/pci_dma.c 2021-02-04 17:41:59.000000000 +0000 @@ -10,13 +10,21 @@ #include #include #include +#include #include #include static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; +static int s390_iommu_strict; -static unsigned long *dma_alloc_cpu_table(void) +static int zpci_refresh_global(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, + zdev->iommu_pages * PAGE_SIZE); +} + +unsigned long *dma_alloc_cpu_table(void) { unsigned long *table, *entry; @@ -25,7 +33,7 @@ return NULL; for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) - *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED; + *entry = ZPCI_TABLE_INVALID; return table; } @@ -43,7 +51,7 @@ return NULL; for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) - *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED; + *entry = ZPCI_PTE_INVALID; return table; } @@ -87,7 +95,7 @@ return pto; } -static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) { unsigned long *sto, *pto; unsigned int rtx, sx, px; @@ -106,20 +114,10 @@ return &pto[px]; } -static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, - dma_addr_t dma_addr, int flags) +void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags) { - unsigned long *entry; - - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); - if (!entry) { - WARN_ON_ONCE(1); - return; - } - if (flags & ZPCI_PTE_INVALID) { invalidate_pt_entry(entry); - return; } else { set_pt_pfaa(entry, page_addr); validate_pt_entry(entry); @@ -138,6 +136,7 @@ u8 *page_addr = (u8 *) (pa & PAGE_MASK); dma_addr_t start_dma_addr = dma_addr; unsigned long irq_flags; + unsigned long *entry; int i, rc = 0; if (!nr_pages) @@ -145,40 +144,54 @@ spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); if (!zdev->dma_table) { - dev_err(&zdev->pdev->dev, "Missing DMA table\n"); + rc = -EINVAL; goto no_refresh; } for (i = 0; i < nr_pages; i++) { - dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) { + rc = -ENOMEM; + goto undo_cpu_trans; + } + dma_update_cpu_trans(entry, page_addr, flags); page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } /* - * rpcit is not required to establish new translations when previously - * invalid translation-table entries are validated, however it is - * required when altering previously valid entries. + * With zdev->tlb_refresh == 0, rpcit is not required to establish new + * translations when previously invalid translation-table entries are + * validated. With lazy unmap, it also is skipped for previously valid + * entries, but a global rpcit is then required before any address can + * be re-used, i.e. after each iommu bitmap wrap-around. */ if (!zdev->tlb_refresh && - ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - /* - * TODO: also need to check that the old entry is indeed INVALID - * and not only for one page but for the whole range... - * -> now we WARN_ON in that case but with lazy unmap that - * needs to be redone! - */ + (!s390_iommu_strict || + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))) goto no_refresh; - rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); +undo_cpu_trans: + if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { + flags = ZPCI_PTE_INVALID; + while (i-- > 0) { + page_addr -= PAGE_SIZE; + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) + break; + dma_update_cpu_trans(entry, page_addr, flags); + } + } no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); return rc; } -static void dma_free_seg_table(unsigned long entry) +void dma_free_seg_table(unsigned long entry) { unsigned long *sto = get_rt_sto(entry); int sx; @@ -190,28 +203,27 @@ dma_free_cpu_table(sto); } -static void dma_cleanup_tables(struct zpci_dev *zdev) +void dma_cleanup_tables(unsigned long *table) { - unsigned long *table; int rtx; - if (!zdev || !zdev->dma_table) + if (!table) return; - table = zdev->dma_table; for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) if (reg_entry_isvalid(table[rtx])) dma_free_seg_table(table[rtx]); dma_free_cpu_table(table); - zdev->dma_table = NULL; } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, unsigned long start, - int size) +static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, + unsigned long start, int size) { - unsigned long boundary_size = 0x1000000; + unsigned long boundary_size; + boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } @@ -219,16 +231,21 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) { unsigned long offset, flags; + int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); - if (offset == -1) + if (offset == -1) { + /* wrap-around */ offset = __dma_alloc_iommu(zdev, 0, size); + wrap = 1; + } if (offset != -1) { zdev->next_bit = offset + size; - if (zdev->next_bit >= zdev->iommu_pages) - zdev->next_bit = 0; + if (!zdev->tlb_refresh && !s390_iommu_strict && wrap) + /* global flush after wrap-around with lazy unmap */ + zpci_refresh_global(zdev); } spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); return offset; @@ -242,61 +259,70 @@ if (!zdev->iommu_bitmap) goto out; bitmap_clear(zdev->iommu_bitmap, offset, size); - if (offset >= zdev->next_bit) + /* + * Lazy flush for unmap: need to move next_bit to avoid address re-use + * until wrap-around. + */ + if (!s390_iommu_strict && offset >= zdev->next_bit) zdev->next_bit = offset + size; out: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); } -int dma_set_mask(struct device *dev, u64 mask) +static inline void zpci_err_dma(unsigned long rc, unsigned long addr) { - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; + struct { + unsigned long rc; + unsigned long addr; + } __packed data = {rc, addr}; - *dev->dma_mask = mask; - return 0; + zpci_err_hex(&data, sizeof(data)); } -EXPORT_SYMBOL_GPL(dma_set_mask); static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; dma_addr_t dma_addr; + int ret; /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); iommu_page_index = dma_alloc_iommu(zdev, nr_pages); - if (iommu_page_index == -1) + if (iommu_page_index == -1) { + ret = -ENOSPC; goto out_err; + } /* Use rounded up size */ size = nr_pages * PAGE_SIZE; dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; if (dma_addr + size > zdev->end_dma) { - dev_err(dev, "(dma_addr: 0x%16.16LX + size: 0x%16.16lx) > end_dma: 0x%16.16Lx\n", - dma_addr, size, zdev->end_dma); + ret = -ERANGE; goto out_free; } if (direction == DMA_NONE || direction == DMA_TO_DEVICE) flags |= ZPCI_TABLE_PROTECTED; - if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { - atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages); - return dma_addr + (offset & ~PAGE_MASK); - } + ret = dma_update_trans(zdev, pa, dma_addr, size, flags); + if (ret) + goto out_free; + + atomic64_add(nr_pages, &zdev->mapped_pages); + return dma_addr + (offset & ~PAGE_MASK); out_free: dma_free_iommu(zdev, iommu_page_index, nr_pages); out_err: - dev_err(dev, "Failed to map addr: %lx\n", pa); + zpci_err("map error:\n"); + zpci_err_dma(ret, pa); return DMA_ERROR_CODE; } @@ -304,17 +330,21 @@ size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long iommu_page_index; - int npages; + int npages, ret; npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr = dma_addr & PAGE_MASK; - if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, - ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) - dev_err(dev, "Failed to unmap addr: %Lx\n", dma_addr); + ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, + ZPCI_PTE_INVALID); + if (ret) { + zpci_err("unmap error:\n"); + zpci_err_dma(ret, dma_addr); + return; + } - atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages); + atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; dma_free_iommu(zdev, iommu_page_index, npages); } @@ -323,7 +353,7 @@ dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); struct page *page; unsigned long pa; dma_addr_t map; @@ -333,7 +363,6 @@ if (!page) return NULL; - atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages); pa = page_to_phys(page); memset((void *) pa, 0, size); @@ -344,6 +373,7 @@ return NULL; } + atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); if (dma_handle) *dma_handle = map; return (void *) pa; @@ -353,8 +383,11 @@ void *pa, dma_addr_t dma_handle, struct dma_attrs *attrs) { - s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size), - DMA_BIDIRECTIONAL, NULL); + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + + size = PAGE_ALIGN(size); + atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long) pa, get_order(size)); } @@ -407,9 +440,15 @@ int zpci_dma_init_device(struct zpci_dev *zdev) { - unsigned int bitmap_order; int rc; + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + spin_lock_init(&zdev->iommu_bitmap_lock); spin_lock_init(&zdev->dma_table_lock); @@ -419,23 +458,27 @@ goto out; } - zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; + /* + * Restrict the iommu bitmap size to the minimum of the following: + * - main memory size + * - 3-level pagetable address limit minus start_dma offset + * - DMA address range allowed by the hardware (clp query pci fn) + * + * Also set zdev->end_dma to the actual end address of the usable + * range, instead of the theoretical maximum as reported by hardware. + */ + zdev->iommu_size = min3((u64) high_memory, + ZPCI_TABLE_SIZE_RT - zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - bitmap_order = get_order(zdev->iommu_pages / 8); - pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n", - zdev->iommu_size, zdev->iommu_pages, bitmap_order); - - zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - bitmap_order); + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto free_dma_table; } - rc = zpci_register_ioat(zdev, - 0, - zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) goto free_bitmap; @@ -453,10 +496,17 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) { + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + zpci_unregister_ioat(zdev, 0); - dma_cleanup_tables(zdev); - free_pages((unsigned long) zdev->iommu_bitmap, - get_order(zdev->iommu_pages / 8)); + dma_cleanup_tables(zdev->dma_table); + zdev->dma_table = NULL; + vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; } @@ -511,3 +561,12 @@ /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_dma_ops); + +static int __init s390_iommu_setup(char *str) +{ + if (!strncmp(str, "strict", 6)) + s390_iommu_strict = 1; + return 0; +} + +__setup("s390_iommu=", s390_iommu_setup);