Skip to content

Commit

Permalink
debug: print pvu size
Browse files Browse the repository at this point in the history
Signed-off-by: Li Hua Qian <[email protected]>
  • Loading branch information
huaqianli committed Nov 28, 2024
1 parent 871d532 commit bbf9296
Showing 1 changed file with 292 additions and 0 deletions.
292 changes: 292 additions & 0 deletions recipes-kernel/linux/files/patches-6.1/0099-WIP-print-pvu-size.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Li Hua Qian <[email protected]>
Date: Wed, 9 Oct 2024 13:00:39 +0800
Subject: [PATCH] WIP: print pvu size

Signed-off-by: Li Hua Qian <[email protected]>
---
drivers/pci/controller/dwc/pci-keystone.c | 1 +
drivers/soc/ti/ti-pvu.c | 2 +
include/linux/dma-mapping.h | 27 +++++++++++-
kernel/dma/direct.c | 17 ++++++++
kernel/dma/mapping.c | 51 +++++++++++++++++++++--
5 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c
index f8fe663e6e75..61a2e1174369 100644
--- a/drivers/pci/controller/dwc/pci-keystone.c
+++ b/drivers/pci/controller/dwc/pci-keystone.c
@@ -1501,6 +1501,7 @@ static int ks_pcie_probe(struct platform_device *pdev)
ret = ks_init_restricted_dma(pdev);
if (ret < 0)
goto err_get_sync;
+ dev_err(dev, "Lee: [%s]-[%d] pdev_size = %ld\n", __func__, __LINE__, sizeof(*pdev));

switch (mode) {
case DW_PCIE_RC_TYPE:
diff --git a/drivers/soc/ti/ti-pvu.c b/drivers/soc/ti/ti-pvu.c
index af1cadc2f4bc..2fbbd74d7d36 100644
--- a/drivers/soc/ti/ti-pvu.c
+++ b/drivers/soc/ti/ti-pvu.c
@@ -270,6 +270,7 @@ static int pvu_create_region(struct ti_pvu *pvu, u64 addr, u64 size)
int psize;
int entry;

+ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size);
while (size > 0) {
entry = pvu_get_free_entry(pvu);
if (entry < 0) {
@@ -306,6 +307,7 @@ static void pvu_remove_region(struct ti_pvu *pvu, u64 addr, u64 size)
u64 entry_addr;
u32 entry2;

+ dev_err(&pvu->pdev->dev, "Lee: [%s]-[%d] size = %ld\n", __func__, __LINE__, size);
for (n = 0; n < pvu->num_entries; n++) {
entry_base = pvu->tlbif_base + n * 0x20;
entry2 = readl(entry_base + PVU_ENTRY2);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 0ee20b764000..6e494ecfdfeb 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -417,16 +417,39 @@ static inline void dma_sync_sgtable_for_device(struct device *dev,
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)

+static unsigned long dma_alloc_count = 0;
+static unsigned long dma_alloc_size = 0;
+
+static inline bool is_pcie_device(struct device *dev)
+{
+ return dev->bus && strcmp(dev->bus->name, "pci") == 0;
+}
+
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
- return dma_alloc_attrs(dev, size, dma_handle, gfp,
- (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
+ void *ret = dma_alloc_attrs(dev, size, dma_handle, gfp,
+ (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
+ dma_alloc_count++;
+ dma_alloc_size += size;
+ if (ret && is_pcie_device(dev)) {
+ dev_err(dev, "Lee: [%s] DMA memory allocated by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n",
+ __func__, size, dma_alloc_count, dma_alloc_size);
+ }
+ dev_err(dev, "[%s] All: %zu bytes, total allocations: %lu, total size: %lu bytes\n",
+ __func__, size, dma_alloc_count, dma_alloc_size);
+ return ret;
}

static inline void dma_free_coherent(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_handle)
{
+ if (is_pcie_device(dev)) {
+ dma_alloc_count--;
+ dma_alloc_size -= size;
+ dev_err(dev, "Lee: [%s] DMA memory freed by PCIe device: %zu bytes, total allocations: %lu, total size: %lu bytes\n",
+ __func__, size, dma_alloc_count, dma_alloc_size);
+ }
return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0);
}

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index d4215739efc7..a43f04059e8e 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -216,15 +216,18 @@ void *dma_direct_alloc(struct device *dev, size_t size,
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;

+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev))
return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp);

+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (!dev_is_dma_coherent(dev)) {
/*
* Fallback to the arch handler if it exists. This should
* eventually go away.
*/
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
@@ -236,6 +239,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
* If there is a global pool, always allocate from it for
* non-coherent devices.
*/
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL))
return dma_alloc_from_global_coherent(dev, size,
dma_handle);
@@ -247,13 +251,17 @@ void *dma_direct_alloc(struct device *dev, size_t size,
*/
remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP);
if (remap) {
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (dma_direct_use_pool(dev, gfp))
return dma_direct_alloc_from_pool(dev, size,
dma_handle, gfp);
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
} else {
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED))
return NULL;
set_uncached = true;
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
}
}

@@ -264,6 +272,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);

+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
/* we always manually zero the memory once we are done */
page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true);
if (!page)
@@ -274,6 +283,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
* combination the cma= arguments and per-arch setup. These need to be
* remapped to return a kernel virtual address.
*/
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (PageHighMem(page)) {
remap = true;
set_uncached = false;
@@ -291,15 +301,18 @@ void *dma_direct_alloc(struct device *dev, size_t size,
/* create a coherent mapping */
ret = dma_common_contiguous_remap(page, size, prot,
__builtin_return_address(0));
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (!ret)
goto out_free_pages;
} else {
ret = page_address(page);
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (dma_set_decrypted(dev, ret, size))
goto out_leak_pages;
}

memset(ret, 0, size);
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);

if (set_uncached) {
arch_dma_prep_coherent(page, size);
@@ -307,17 +320,21 @@ void *dma_direct_alloc(struct device *dev, size_t size,
if (IS_ERR(ret))
goto out_encrypt_pages;
}
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);

*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return ret;

out_encrypt_pages:
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
if (dma_set_encrypted(dev, page_address(page), size))
return NULL;
out_free_pages:
__dma_direct_free_pages(dev, page, size);
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
return NULL;
out_leak_pages:
+ dev_err(dev, "Lee: [%s]-[%d] attrs = %ld\n", __func__, __LINE__, attrs);
return NULL;
}

diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 33437d620644..03d9ca27ffe7 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -224,6 +224,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
* dma_unmap_sg_attrs() should be used to unmap the buffer with the
* original sg and original nents (not the value returned by this funciton).
*/
+size_t total_size = 0;
+size_t all_total_size = 0;
unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
@@ -232,6 +234,32 @@ unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs);
if (ret < 0)
return 0;
+
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) {
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld, nentry = %d\n", __func__, __LINE__, sizeof(*dev), nents);
+
+ for (int i = 0; i < nents; i++) {
+ size_t entry_size = sg[i].length;
+ total_size += entry_size;
+ if (i%10) {
+ dev_err(dev, "Lee: Entry %d: size = %zu bytes\n", i, entry_size);
+ dev_err(dev, "Lee: Total mapping size = %zu bytes\n", total_size);
+ }
+ }
+
+ }
+
+ for (int i = 0; i < nents; i++) {
+ size_t entry_size = sg[i].length;
+ all_total_size += entry_size;
+ }
+ // if (all_total_size > 16200000) {
+ // static int j = 0;
+ // if (j%10 == 0)
+ // dev_err(dev, "Lee: All total mapping size = %zu bytes\n", all_total_size);
+ // j++;
+ // }
+
return ret;
}
EXPORT_SYMBOL(dma_map_sg_attrs);
@@ -272,6 +300,9 @@ int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
if (nents < 0)
return nents;
sgt->nents = nents;
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) {
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev));
+ }
return 0;
}
EXPORT_SYMBOL_GPL(dma_map_sgtable);
@@ -498,20 +529,34 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,

WARN_ON_ONCE(!dev->coherent_dma_mask);

+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) {
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev));
+ }
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;

/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);

- if (dma_alloc_direct(dev, ops))
+ if (dma_alloc_direct(dev, ops)) {
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
- else if (ops->alloc)
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr);
+ }
+ else if (ops->alloc){
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
- else
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr);
+ }
+ else {
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr);
return NULL;
+ }

debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs);
+ if (dev->bus && strcmp(dev->bus->name, "pci") == 0) {
+ dev_err(dev, "Lee: [%s]-[%d] pci dev_size = %ld\n", __func__, __LINE__, sizeof(*dev));
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr);
+ }
+ dev_err(dev, "Lee: [%s]-[%d] cpu_addr = %ld\n", __func__, __LINE__, cpu_addr);
return cpu_addr;
}
EXPORT_SYMBOL(dma_alloc_attrs);

0 comments on commit bbf9296

Please sign in to comment.