-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Li Hua Qian <[email protected]>
- Loading branch information
Showing
1 changed file
with
292 additions
and
0 deletions.
There are no files selected for viewing
292 changes: 292 additions & 0 deletions
292
recipes-kernel/linux/files/patches-6.1/0099-WIP-print-pvu-size.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,292 @@ | ||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||
From: Li Hua Qian <[email protected]> | ||
Date: Wed, 9 Oct 2024 13:00:39 +0800 | ||
Subject: [PATCH] WIP: print pvu size | ||
|
||
Signed-off-by: Li Hua Qian <[email protected]> | ||
--- | ||
drivers/pci/controller/dwc/pci-keystone.c | 1 + | ||
drivers/soc/ti/ti-pvu.c | 2 + | ||
include/linux/dma-mapping.h | 27 +++++++++++- | ||
kernel/dma/direct.c | 17 ++++++++ | ||
kernel/dma/mapping.c | 51 +++++++++++++++++++++-- | ||
5 files changed, 93 insertions(+), 5 deletions(-) | ||
|
||
diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c | ||
index f8fe663e6e75..61a2e1174369 100644 | ||
--- a/drivers/pci/controller/dwc/pci-keystone.c | ||
+++ b/drivers/pci/controller/dwc/pci-keystone.c | ||
@@ -1501,6 +1501,7 @@ static int ks_pcie_probe(struct platform_device *pdev) | ||
ret = ks_init_restricted_dma(pdev); | ||
if (ret < 0) | ||
goto err_get_sync; | ||
+ dev_err(dev, "Lee: [%s]-[%d] pdev_size = %ld\n", __func__, __LINE__, sizeof(*pdev)); | ||
|
||
switch (mode) { | ||
case DW_PCIE_RC_TYPE: | ||
diff --git a/drivers/soc/ti/ti-pvu.c b/drivers/soc/ti/ti-pvu.c | ||
index af1cadc2f4bc..2fbbd74d7d36 100644 | ||
--- a/drivers/soc/ti/ti-pvu.c | ||
+++ b/drivers/soc/ti/ti-pvu.c | ||
@@ -270,6 +270,7 @@ static int pvu_create_region(struct ti_pvu *pvu, u64 addr, u64 size) | ||
int psize; | ||
int entry; | ||
|
||
+ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size); | ||
while (size > 0) { | ||
entry = pvu_get_free_entry(pvu); | ||
if (entry < 0) { | ||
@@ -306,6 +307,7 @@ static void pvu_remove_region(struct ti_pvu *pvu, u64 addr, u64 size) | ||
u64 entry_addr; | ||
u32 entry2; | ||
|
||
+ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size); | ||
for (n = 0; n < pvu->num_entries; n++) { | ||
entry_base = pvu->tlbif_base + n * 0x20; | ||
entry2 = readl(entry_base + PVU_ENTRY2); | ||
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h | ||
index 0ee20b764000..6e494ecfdfeb 100644 | ||
--- a/include/linux/dma-mapping.h | ||
+++ b/include/linux/dma-mapping.h | ||
@@ -417,16 +417,39 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, | ||
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) | ||
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) | ||
|
||
+static unsigned long dma_alloc_count = 0; | ||
+static unsigned long dma_alloc_size = 0; | ||
+ | ||
+static inline bool is_pcie_device(struct device *dev) | ||
+{ | ||
+ return dev->bus && strcmp(dev->bus->name, "pci") == 0; | ||
+} | ||
+ | ||
static inline void *dma_alloc_coherent(struct device *dev, size_t size, | ||
dma_addr_t *dma_handle, gfp_t gfp) | ||
{ | ||
- return dma_alloc_attrs(dev, size, dma_handle, gfp, | ||
- (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); | ||
+ void *ret = dma_alloc_attrs(dev, size, dma_handle, gfp, | ||
+ (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); | ||
+ dma_alloc_count++; | ||
+ dma_alloc_size += size; | ||
+ if (ret && is_pcie_device(dev)) { | ||
+ dev_err(dev, "Lee: [%s] DMA memory allocated by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n", | ||
+ __func__, size, dma_alloc_count, dma_alloc_size); | ||
+ } | ||
+ dev_err(dev, "[%s] All: %zu bytes, total allocations: %lu, total size: %lu bytes\n", | ||
+ __func__, size, dma_alloc_count, dma_alloc_size); | ||
+ return ret; | ||
} | ||
|
||
static inline void dma_free_coherent(struct device *dev, size_t size, | ||
void *cpu_addr, dma_addr_t dma_handle) | ||
{ | ||
+ if (is_pcie_device(dev)) { | ||
+ dma_alloc_count--; | ||
+ dma_alloc_size -= size; | ||
+ dev_err(dev, "Lee: [%s] DMA memory freed by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n", | ||
+ __func__, size, dma_alloc_count, dma_alloc_size); | ||
+ } | ||
return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0); | ||
} | ||
|
||
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c | ||
index d4215739efc7..a43f04059e8e 100644 | ||
--- a/kernel/dma/direct.c | ||
+++ b/kernel/dma/direct.c | ||
@@ -216,15 +216,18 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
if (attrs & DMA_ATTR_NO_WARN) | ||
gfp |= __GFP_NOWARN; | ||
|
||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && | ||
!force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) | ||
return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); | ||
|
||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (!dev_is_dma_coherent(dev)) { | ||
/* | ||
* Fallback to the arch handler if it exists. This should | ||
* eventually go away. | ||
*/ | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && | ||
!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && | ||
!IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && | ||
@@ -236,6 +239,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
* If there is a global pool, always allocate from it for | ||
* non-coherent devices. | ||
*/ | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) | ||
return dma_alloc_from_global_coherent(dev, size, | ||
dma_handle); | ||
@@ -247,13 +251,17 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
*/ | ||
remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); | ||
if (remap) { | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (dma_direct_use_pool(dev, gfp)) | ||
return dma_direct_alloc_from_pool(dev, size, | ||
dma_handle, gfp); | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
} else { | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED)) | ||
return NULL; | ||
set_uncached = true; | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
} | ||
} | ||
|
||
@@ -264,6 +272,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) | ||
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); | ||
|
||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
/* we always manually zero the memory once we are done */ | ||
page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); | ||
if (!page) | ||
@@ -274,6 +283,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
* combination the cma= arguments and per-arch setup. These need to be | ||
* remapped to return a kernel virtual address. | ||
*/ | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (PageHighMem(page)) { | ||
remap = true; | ||
set_uncached = false; | ||
@@ -291,15 +301,18 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
/* create a coherent mapping */ | ||
ret = dma_common_contiguous_remap(page, size, prot, | ||
__builtin_return_address(0)); | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (!ret) | ||
goto out_free_pages; | ||
} else { | ||
ret = page_address(page); | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (dma_set_decrypted(dev, ret, size)) | ||
goto out_leak_pages; | ||
} | ||
|
||
memset(ret, 0, size); | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
|
||
if (set_uncached) { | ||
arch_dma_prep_coherent(page, size); | ||
@@ -307,17 +320,21 @@ void *dma_direct_alloc(struct device *dev, size_t size, | ||
if (IS_ERR(ret)) | ||
goto out_encrypt_pages; | ||
} | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
|
||
*dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); | ||
return ret; | ||
|
||
out_encrypt_pages: | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
if (dma_set_encrypted(dev, page_address(page), size)) | ||
return NULL; | ||
out_free_pages: | ||
__dma_direct_free_pages(dev, page, size); | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
return NULL; | ||
out_leak_pages: | ||
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs); | ||
return NULL; | ||
} | ||
|
||
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c | ||
index 33437d620644..03d9ca27ffe7 100644 | ||
--- a/kernel/dma/mapping.c | ||
+++ b/kernel/dma/mapping.c | ||
@@ -224,6 +224,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, | ||
* dma_unmap_sg_attrs() should be used to unmap the buffer with the | ||
* original sg and original nents (not the value returned by this funciton). | ||
*/ | ||
+size_t total_size = 0; | ||
+size_t all_total_size = 0; | ||
unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, | ||
int nents, enum dma_data_direction dir, unsigned long attrs) | ||
{ | ||
@@ -232,6 +234,32 @@ unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, | ||
ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs); | ||
if (ret < 0) | ||
return 0; | ||
+ | ||
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { | ||
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld, nentry = %d\n", __func__, __LINE__, sizeof(*dev), nents); | ||
+ | ||
+ for (int i = 0; i < nents; i++) { | ||
+ size_t entry_size = sg[i].length; | ||
+ total_size += entry_size; | ||
+ if (i%10) { | ||
+ dev_err(dev, "Lee: Entry %d: size = %zu bytes\n", i, entry_size); | ||
+ dev_err(dev, "Lee: Total mapping size = %zu bytes\n", total_size); | ||
+ } | ||
+ } | ||
+ | ||
+ } | ||
+ | ||
+ for (int i = 0; i < nents; i++) { | ||
+ size_t entry_size = sg[i].length; | ||
+ all_total_size += entry_size; | ||
+ } | ||
+ // if (all_total_size > 16200000) { | ||
+ // static int j = 0; | ||
+ // if (j%10 == 0) | ||
+ // dev_err(dev, "Lee: All total mapping size = %zu bytes\n", all_total_size); | ||
+ // j++; | ||
+ // } | ||
+ | ||
return ret; | ||
} | ||
EXPORT_SYMBOL(dma_map_sg_attrs); | ||
@@ -272,6 +300,9 @@ int dma_map_sgtable(struct device *dev, struct sg_table *sgt, | ||
if (nents < 0) | ||
return nents; | ||
sgt->nents = nents; | ||
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { | ||
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); | ||
+ } | ||
return 0; | ||
} | ||
EXPORT_SYMBOL_GPL(dma_map_sgtable); | ||
@@ -498,20 +529,34 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
|
||
WARN_ON_ONCE(!dev->coherent_dma_mask); | ||
|
||
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { | ||
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); | ||
+ } | ||
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) | ||
return cpu_addr; | ||
|
||
/* let the implementation decide on the zone to allocate from: */ | ||
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); | ||
|
||
- if (dma_alloc_direct(dev, ops)) | ||
+ if (dma_alloc_direct(dev, ops)) { | ||
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); | ||
- else if (ops->alloc) | ||
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); | ||
+ } | ||
+ else if (ops->alloc){ | ||
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
- else | ||
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); | ||
+ } | ||
+ else { | ||
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); | ||
return NULL; | ||
+ } | ||
|
||
debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); | ||
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) { | ||
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev)); | ||
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); | ||
+ } | ||
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr); | ||
return cpu_addr; | ||
} | ||
EXPORT_SYMBOL(dma_alloc_attrs); |