diff --git a/recipes-kernel/linux/files/patches-6.1/0099-WIP-print-pvu-size.patch b/recipes-kernel/linux/files/patches-6.1/0099-WIP-print-pvu-size.patch new file mode 100644 index 000000000..81f989016 --- /dev/null +++ b/recipes-kernel/linux/files/patches-6.1/0099-WIP-print-pvu-size.patch @@ -0,0 +1,292 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Li Hua Qian +Date: Wed, 9 Oct 2024 13:00:39 +0800 +Subject: [PATCH] WIP: print pvu size + +Signed-off-by: Li Hua Qian +--- + drivers/pci/controller/dwc/pci-keystone.c | 1 + + drivers/soc/ti/ti-pvu.c | 2 + + include/linux/dma-mapping.h | 27 +++++++++++- + kernel/dma/direct.c | 17 ++++++++ + kernel/dma/mapping.c | 51 +++++++++++++++++++++-- + 5 files changed, 93 insertions(+), 5 deletions(-) + +diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c +index f8fe663e6e75..61a2e1174369 100644 +--- a/drivers/pci/controller/dwc/pci-keystone.c ++++ b/drivers/pci/controller/dwc/pci-keystone.c +@@ -1501,6 +1501,7 @@ static int ks_pcie_probe(struct platform_device *pdev) + ret = ks_init_restricted_dma(pdev); + if (ret < 0) + goto err_get_sync; ++ dev_err(dev, "Lee: [%s]-[%d] pdev_size = %ld\n", __func__, __LINE__, sizeof(*pdev)); + + switch (mode) { + case DW_PCIE_RC_TYPE: +diff --git a/drivers/soc/ti/ti-pvu.c b/drivers/soc/ti/ti-pvu.c +index af1cadc2f4bc..2fbbd74d7d36 100644 +--- a/drivers/soc/ti/ti-pvu.c ++++ b/drivers/soc/ti/ti-pvu.c +@@ -270,6 +270,7 @@ static int pvu_create_region(struct ti_pvu *pvu, u64 addr, u64 size) + int psize; + int entry; + ++ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size); + while (size > 0) { + entry = pvu_get_free_entry(pvu); + if (entry < 0) { +@@ -306,6 +307,7 @@ static void pvu_remove_region(struct ti_pvu *pvu, u64 addr, u64 size) + u64 entry_addr; + u32 entry2; + ++ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size); + for (n = 0; n < pvu->num_entries; n++) { + entry_base = pvu->tlbif_base + n * 0x20; + entry2 = readl(entry_base + PVU_ENTRY2); +diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h +index 0ee20b764000..6e494ecfdfeb 100644 +--- a/include/linux/dma-mapping.h ++++ b/include/linux/dma-mapping.h +@@ -417,16 +417,39 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, + #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) + #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) + ++static unsigned long dma_alloc_count = 0; ++static unsigned long dma_alloc_size = 0; ++ ++static inline bool is_pcie_device(struct device *dev) ++{ ++ return dev->bus && strcmp(dev->bus->name, "pci") == 0; ++} ++ + static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) + { +- return dma_alloc_attrs(dev, size, dma_handle, gfp, +- (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); ++ void *ret = dma_alloc_attrs(dev, size, dma_handle, gfp, ++ (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); ++ dma_alloc_count++; ++ dma_alloc_size += size; ++ if (ret && is_pcie_device(dev)) { ++ dev_err(dev, "Lee: [%s] DMA memory allocated by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n", ++ __func__, size, dma_alloc_count, dma_alloc_size); ++ } ++ dev_err(dev, "[%s] All: %zu bytes, total allocations: %lu, total size: %lu bytes\n", ++ __func__, size, dma_alloc_count, dma_alloc_size); ++ return ret; + } + + static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) + { ++ if (is_pcie_device(dev)) { ++ dma_alloc_count--; ++ dma_alloc_size -= size; ++ dev_err(dev, "Lee: [%s] DMA memory freed by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n", ++ __func__, size, dma_alloc_count, dma_alloc_size); ++ } + return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0); + } + +diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c +index d4215739efc7..a43f04059e8e 100644 +--- a/kernel/dma/direct.c ++++ b/kernel/dma/direct.c +@@ -216,15 +216,18 @@ void *dma_direct_alloc(struct device *dev, size_t size, + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); + ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (!dev_is_dma_coherent(dev)) { + /* + * Fallback to the arch handler if it exists. This should + * eventually go away. + */ ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && +@@ -236,6 +239,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, + * If there is a global pool, always allocate from it for + * non-coherent devices. + */ ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) + return dma_alloc_from_global_coherent(dev, size, + dma_handle); +@@ -247,13 +251,17 @@ void *dma_direct_alloc(struct device *dev, size_t size, + */ + remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); + if (remap) { ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (dma_direct_use_pool(dev, gfp)) + return dma_direct_alloc_from_pool(dev, size, + dma_handle, gfp); ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + } else { ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED)) + return NULL; + set_uncached = true; ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + } + } + +@@ -264,6 +272,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, + if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); + ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + /* we always manually zero the memory once we are done */ + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); + if (!page) +@@ -274,6 +283,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, + * combination the cma= arguments and per-arch setup. These need to be + * remapped to return a kernel virtual address. + */ ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (PageHighMem(page)) { + remap = true; + set_uncached = false; +@@ -291,15 +301,18 @@ void *dma_direct_alloc(struct device *dev, size_t size, + /* create a coherent mapping */ + ret = dma_common_contiguous_remap(page, size, prot, + __builtin_return_address(0)); ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (!ret) + goto out_free_pages; + } else { + ret = page_address(page); ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; + } + + memset(ret, 0, size); ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + + if (set_uncached) { + arch_dma_prep_coherent(page, size); +@@ -307,17 +320,21 @@ void *dma_direct_alloc(struct device *dev, size_t size, + if (IS_ERR(ret)) + goto out_encrypt_pages; + } ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return ret; + + out_encrypt_pages: ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + if (dma_set_encrypted(dev, page_address(page), size)) + return NULL; + out_free_pages: + __dma_direct_free_pages(dev, page, size); ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + return NULL; + out_leak_pages: ++ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); + return NULL; + } + +diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c +index 33437d620644..03d9ca27ffe7 100644 +--- a/kernel/dma/mapping.c ++++ b/kernel/dma/mapping.c +@@ -224,6 +224,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + * dma_unmap_sg_attrs() should be used to unmap the buffer with the + * original sg and original nents (not the value returned by this funciton). + */ ++size_t total_size = 0; ++size_t all_total_size = 0; + unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) + { +@@ -232,6 +234,32 @@ unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs); + if (ret < 0) + return 0; ++ ++ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { ++ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld, nentry = %d\n", __func__, __LINE__, sizeof(*dev), nents); ++ ++ for (int i = 0; i < nents; i++) { ++ size_t entry_size = sg[i].length; ++ total_size += entry_size; ++ if (i%10) { ++ dev_err(dev, "Lee: Entry %d: size = %zu bytes\n", i, entry_size); ++ dev_err(dev, "Lee: Total mapping size = %zu bytes\n", total_size); ++ } ++ } ++ ++ } ++ ++ for (int i = 0; i < nents; i++) { ++ size_t entry_size = sg[i].length; ++ all_total_size += entry_size; ++ } ++ // if (all_total_size > 16200000) { ++ // static int j = 0; ++ // if (j%10 == 0) ++ // dev_err(dev, "Lee: All total mapping size = %zu bytes\n", all_total_size); ++ // j++; ++ // } ++ + return ret; + } + EXPORT_SYMBOL(dma_map_sg_attrs); +@@ -272,6 +300,9 @@ int dma_map_sgtable(struct device *dev, struct sg_table *sgt, + if (nents < 0) + return nents; + sgt->nents = nents; ++ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { ++ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); ++ } + return 0; + } + EXPORT_SYMBOL_GPL(dma_map_sgtable); +@@ -498,20 +529,34 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + + WARN_ON_ONCE(!dev->coherent_dma_mask); + ++ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { ++ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); ++ } + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) + return cpu_addr; + + /* let the implementation decide on the zone to allocate from: */ + flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); + +- if (dma_alloc_direct(dev, ops)) ++ if (dma_alloc_direct(dev, ops)) { + cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); +- else if (ops->alloc) ++ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); ++ } ++ else if (ops->alloc){ + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); +- else ++ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); ++ } ++ else { ++ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); + return NULL; ++ } + + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); ++ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { ++ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); ++ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); ++ } ++ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); + return cpu_addr; + } + EXPORT_SYMBOL(dma_alloc_attrs);