drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
When rx_flag == MTK_RX_FLAGS_HWLRO,
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst
Branch to use kmalloc when rx_data_len > PAGE_SIZE.
Signed-off-by: Chen Lin <chen45464546@163.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..d0eebca 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1914,7 +1914,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ else
+ ring->data[i] = kmalloc(ring->frag_size, GFP_KERNEL);
if (!ring->data[i])
return -ENOMEM;
}
--
1.7.9.5
On 03.06.22 06:10, Chen Lin wrote: > When rx_flag == MTK_RX_FLAGS_HWLRO, > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. > netdev_alloc_frag is for alloction of page fragment only. > Reference to other drivers and Documentation/vm/page_frags.rst > > Branch to use kmalloc when rx_data_len > PAGE_SIZE. > > Signed-off-by: Chen Lin <chen45464546@163.com> > --- > drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > index b3b3c07..d0eebca 100644 > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > @@ -1914,7 +1914,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > return -ENOMEM; > > for (i = 0; i < rx_dma_size; i++) { > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > + if (ring->frag_size <= PAGE_SIZE) > + ring->data[i] = netdev_alloc_frag(ring->frag_size); > + else > + ring->data[i] = kmalloc(ring->frag_size, GFP_KERNEL); I'm pretty sure you also need to update all the other places in the code that currently assume that the buffer is allocated using the page frag allocator. - Felix
When rx_flag == MTK_RX_FLAGS_HWLRO,
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst
Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.
Signed-off-by: Chen Lin <chen45464546@163.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..772d903 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
goto release_desc;
/* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE) {
+ new_data = napi_alloc_frag(ring->frag_size);
+ } else {
+ struct page *page;
+ unsigned int order = get_order(ring->frag_size);
+
+ page = alloc_pages(GFP_ATOMIC | __GFP_COMP |
+ __GFP_NOWARN, order);
+ new_data = page ? page_address(page) : NULL;
+ }
if (unlikely(!new_data)) {
netdev->stats.rx_dropped++;
goto release_desc;
@@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE) {
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ } else {
+ struct page *page;
+ unsigned int order = get_order(ring->frag_size);
+
+ page = alloc_pages(GFP_KERNEL | __GFP_COMP |
+ __GFP_NOWARN, order);
+ ring->data[i] = page ? page_address(page) : NULL;
+ }
if (!ring->data[i])
return -ENOMEM;
}
--
1.7.9.5
On Fri, Jun 3, 2022 at 1:46 AM Chen Lin <chen45464546@163.com> wrote: > > When rx_flag == MTK_RX_FLAGS_HWLRO, > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. > netdev_alloc_frag is for alloction of page fragment only. > Reference to other drivers and Documentation/vm/page_frags.rst > > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE. > > Signed-off-by: Chen Lin <chen45464546@163.com> ... > goto release_desc; > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > return -ENOMEM; > > for (i = 0; i < rx_dma_size; i++) { > - ring->data[i] = netdev_alloc_frag(ring->frag_size); Note aside, calling netdev_alloc_frag() in a loop like that is adding GFP_ATOMIC pressure. mtk_rx_alloc() being in process context, using GFP_KERNEL allocations would be less aggressive and have more chances to succeed. We probably should offer a generic helper. This could be used from driver/net/tun.c and others.
On Fri, 3 Jun 2022 10:25:16 -0700 Eric Dumazet wrote: > > goto release_desc; > > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > > return -ENOMEM; > > > > for (i = 0; i < rx_dma_size; i++) { > > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > > Note aside, calling netdev_alloc_frag() in a loop like that is adding > GFP_ATOMIC pressure. > > mtk_rx_alloc() being in process context, using GFP_KERNEL allocations > would be less aggressive and > have more chances to succeed. > > We probably should offer a generic helper. This could be used from > driver/net/tun.c and others. Do cases where netdev_alloc_frag() is not run from a process context from to your mind? My feeling is that the prevailing pattern is what this driver does, which is netdev_alloc_frag() at startup / open and napi_alloc_frag() from the datapath. So maybe we can even spare the detail in the API and have napi_alloc_frag() assume GFP_KERNEL by default?
On Fri, Jun 3, 2022 at 11:59 AM Jakub Kicinski <kuba@kernel.org> wrote: > > On Fri, 3 Jun 2022 10:25:16 -0700 Eric Dumazet wrote: > > > goto release_desc; > > > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > > > return -ENOMEM; > > > > > > for (i = 0; i < rx_dma_size; i++) { > > > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > > > > Note aside, calling netdev_alloc_frag() in a loop like that is adding > > GFP_ATOMIC pressure. > > > > mtk_rx_alloc() being in process context, using GFP_KERNEL allocations > > would be less aggressive and > > have more chances to succeed. > > > > We probably should offer a generic helper. This could be used from > > driver/net/tun.c and others. > > Do cases where netdev_alloc_frag() is not run from a process context > from to your mind? My feeling is that the prevailing pattern is what > this driver does, which is netdev_alloc_frag() at startup / open and > napi_alloc_frag() from the datapath. So maybe we can even spare the > detail in the API and have napi_alloc_frag() assume GFP_KERNEL by > default? Yes, we only have to review callers and change the documentation and implementation. The confusion/overhead/generalization came with : commit 7ba7aeabbaba484347cc98fbe9045769ca0d118d Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri Jun 7 21:20:34 2019 +0200 net: Don't disable interrupts in napi_alloc_frag() netdev_alloc_frag() can be used from any context and is used by NAPI and non-NAPI drivers. Non-NAPI drivers use it in interrupt context and NAPI drivers use it during initial allocation (->ndo_open() or ->ndo_change_mtu()). Some NAPI drivers share the same function for the initial allocation and the allocation in their NAPI callback. The interrupts are disabled in order to ensure locked access from every context to `netdev_alloc_cache'. Let netdev_alloc_frag() check if interrupts are disabled. If they are, use `netdev_alloc_cache' otherwise disable BH and invoke __napi_alloc_frag() for the allocation. The IRQ check is cheaper compared to disabling & enabling interrupts and memory allocation with disabled interrupts does not work on -RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: David S. Miller <davem@davemloft.net>
On Fri, 3 Jun 2022 12:11:43 -0700 Eric Dumazet wrote: > Yes, we only have to review callers and change the documentation and > implementation. > > The confusion/overhead/generalization came with : > > commit 7ba7aeabbaba484347cc98fbe9045769ca0d118d > Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > Date: Fri Jun 7 21:20:34 2019 +0200 > > net: Don't disable interrupts in napi_alloc_frag() > > netdev_alloc_frag() can be used from any context and is used by NAPI > and non-NAPI drivers. Non-NAPI drivers use it in interrupt context > and NAPI drivers use it during initial allocation (->ndo_open() or > ->ndo_change_mtu()). Some NAPI drivers share the same function for the > initial allocation and the allocation in their NAPI callback. > > The interrupts are disabled in order to ensure locked access from every > context to `netdev_alloc_cache'. > > Let netdev_alloc_frag() check if interrupts are disabled. If they are, > use `netdev_alloc_cache' otherwise disable BH and invoke > __napi_alloc_frag() for the allocation. The IRQ check is cheaper > compared to disabling & enabling interrupts and memory allocation with > disabled interrupts does not work on -RT. Hm, should have looked at the code. Were you thinking of adding a helper which would replace both netdev_ and napi_ variants and DTRT internally? An option for getting GFP_KERNEL in there would be having an rtnl frag cache. Users who need frags on the reconfig path should be under rtnl, they can call rtnl_alloc_frag(), which can use GFP_KERNEL internally. Otherwise the GFP_KERNEL frag cache would need to be protected by another mutex, I presume. Pre-allocating memory before using the napi cache seems hard.
On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote: > > When rx_flag == MTK_RX_FLAGS_HWLRO, > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. > netdev_alloc_frag is for alloction of page fragment only. > Reference to other drivers and Documentation/vm/page_frags.rst > > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE. > > Signed-off-by: Chen Lin <chen45464546@163.com> > --- > drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 ++++++++++++++++++++-- > 1 file changed, 20 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > index b3b3c07..772d903 100644 > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, > goto release_desc; > > /* alloc new buffer */ > - new_data = napi_alloc_frag(ring->frag_size); > + if (ring->frag_size <= PAGE_SIZE) { > + new_data = napi_alloc_frag(ring->frag_size); > + } else { > + struct page *page; > + unsigned int order = get_order(ring->frag_size); > + > + page = alloc_pages(GFP_ATOMIC | __GFP_COMP | > + __GFP_NOWARN, order); > + new_data = page ? page_address(page) : NULL; > + } > if (unlikely(!new_data)) { > netdev->stats.rx_dropped++; > goto release_desc; > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > return -ENOMEM; > > for (i = 0; i < rx_dma_size; i++) { > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > + if (ring->frag_size <= PAGE_SIZE) { > + ring->data[i] = netdev_alloc_frag(ring->frag_size); > + } else { > + struct page *page; > + unsigned int order = get_order(ring->frag_size); > + > + page = alloc_pages(GFP_KERNEL | __GFP_COMP | > + __GFP_NOWARN, order); > + ring->data[i] = page ? page_address(page) : NULL; > + } > if (!ring->data[i]) > return -ENOMEM; > } Actually I looked closer at this driver. Is it able to receive frames larger than 2K? If not there isn't any point in this change. Based on commit 4fd59792097a ("net: ethernet: mediatek: support setting MTU") it looks like it doesn't, so odds are this patch is not necessary.
On Fri, Jun 3, 2022 at 8:25 AM Alexander Duyck <alexander.duyck@gmail.com> wrote: > > On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote: > > > > When rx_flag == MTK_RX_FLAGS_HWLRO, > > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. > > netdev_alloc_frag is for alloction of page fragment only. > > Reference to other drivers and Documentation/vm/page_frags.rst > > > > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE. > > > > Signed-off-by: Chen Lin <chen45464546@163.com> > > --- > > drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 ++++++++++++++++++++-- > > 1 file changed, 20 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > > index b3b3c07..772d903 100644 > > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c > > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c > > @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, > > goto release_desc; > > > > /* alloc new buffer */ > > - new_data = napi_alloc_frag(ring->frag_size); > > + if (ring->frag_size <= PAGE_SIZE) { > > + new_data = napi_alloc_frag(ring->frag_size); > > + } else { > > + struct page *page; > > + unsigned int order = get_order(ring->frag_size); > > + > > + page = alloc_pages(GFP_ATOMIC | __GFP_COMP | > > + __GFP_NOWARN, order); > > + new_data = page ? page_address(page) : NULL; > > + } > > if (unlikely(!new_data)) { > > netdev->stats.rx_dropped++; > > goto release_desc; > > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) > > return -ENOMEM; > > > > for (i = 0; i < rx_dma_size; i++) { > > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > > + if (ring->frag_size <= PAGE_SIZE) { > > + ring->data[i] = netdev_alloc_frag(ring->frag_size); > > + } else { > > + struct page *page; > > + unsigned int order = get_order(ring->frag_size); > > + > > + page = alloc_pages(GFP_KERNEL | __GFP_COMP | > > + __GFP_NOWARN, order); > > + ring->data[i] = page ? page_address(page) : NULL; > > + } > > if (!ring->data[i]) > > return -ENOMEM; > > } > > Actually I looked closer at this driver. Is it able to receive frames > larger than 2K? If not there isn't any point in this change. > > Based on commit 4fd59792097a ("net: ethernet: mediatek: support > setting MTU") it looks like it doesn't, so odds are this patch is not > necessary. I spoke too soon. I had overlooked the LRO part. With that being the case you can probably optimize this code to do away with the get_order piece entirely, at least during runtime. My main concern is that doing that in the fast-path will be expensive so you would be much better off doing something like get_order(mtk_max_frag_size(MTK_RX_FLAGS_HWLRO)) which would be converted into a constant at compile time since everything else would be less than 1 page in size. Also you could then replace alloc_pages with __get_free_pages which would take care of the page_address call for you.
At 2022-06-03 23:33:25, "Alexander Duyck" <alexander.duyck@gmail.com> wrote: >On Fri, Jun 3, 2022 at 8:25 AM Alexander Duyck ><alexander.duyck@gmail.com> wrote: >> >> On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote: >> > >> > When rx_flag == MTK_RX_FLAGS_HWLRO, >> > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. >> > netdev_alloc_frag is for alloction of page fragment only. >> > Reference to other drivers and Documentation/vm/page_frags.rst >> > >> > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE. >> > >> > Signed-off-by: Chen Lin <chen45464546@163.com> >> > --- >> > drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 ++++++++++++++++++++-- >> > 1 file changed, 20 insertions(+), 2 deletions(-) >> > >> > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c >> > index b3b3c07..772d903 100644 >> > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c >> > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c >> > @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, >> > goto release_desc; >> > >> > /* alloc new buffer */ >> > - new_data = napi_alloc_frag(ring->frag_size); >> > + if (ring->frag_size <= PAGE_SIZE) { >> > + new_data = napi_alloc_frag(ring->frag_size); >> > + } else { >> > + struct page *page; >> > + unsigned int order = get_order(ring->frag_size); >> > + >> > + page = alloc_pages(GFP_ATOMIC | __GFP_COMP | >> > + __GFP_NOWARN, order); >> > + new_data = page ? page_address(page) : NULL; >> > + } >> > if (unlikely(!new_data)) { >> > netdev->stats.rx_dropped++; >> > goto release_desc; >> > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) >> > return -ENOMEM; >> > >> > for (i = 0; i < rx_dma_size; i++) { >> > - ring->data[i] = netdev_alloc_frag(ring->frag_size); >> > + if (ring->frag_size <= PAGE_SIZE) { >> > + ring->data[i] = netdev_alloc_frag(ring->frag_size); >> > + } else { >> > + struct page *page; >> > + unsigned int order = get_order(ring->frag_size); >> > + >> > + page = alloc_pages(GFP_KERNEL | __GFP_COMP | >> > + __GFP_NOWARN, order); >> > + ring->data[i] = page ? page_address(page) : NULL; >> > + } >> > if (!ring->data[i]) >> > return -ENOMEM; >> > } >> >> Actually I looked closer at this driver. Is it able to receive frames >> larger than 2K? If not there isn't any point in this change. >> >> Based on commit 4fd59792097a ("net: ethernet: mediatek: support >> setting MTU") it looks like it doesn't, so odds are this patch is not >> necessary. > >I spoke too soon. I had overlooked the LRO part. With that being the >case you can probably optimize this code to do away with the get_order >piece entirely, at least during runtime. My main concern is that doing >that in the fast-path will be expensive so you would be much better >off doing something like >get_order(mtk_max_frag_size(MTK_RX_FLAGS_HWLRO)) which would be >converted into a constant at compile time since everything else would >be less than 1 page in size. > >Also you could then replace alloc_pages with __get_free_pages which >would take care of the page_address call for you. Thanks for the tips. I'll try again. It can also be seen from here it is easy to make mistakes in parameter fragsz.
When rx_flag == MTK_RX_FLAGS_HWLRO,
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst
Branch to use __get_free_pages when ring->frag_size > PAGE_SIZE.
Signed-off-by: Chen Lin <chen45464546@163.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..ba9259a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1467,7 +1467,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
goto release_desc;
/* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ new_data = napi_alloc_frag(ring->frag_size);
+ else
+ new_data = (void *)__get_free_pages(GFP_ATOMIC |
+ __GFP_COMP | __GFP_NOWARN,
+ get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH)));
+
if (unlikely(!new_data)) {
netdev->stats.rx_dropped++;
goto release_desc;
@@ -1914,7 +1920,13 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ else
+ ring->data[i] = (void *)__get_free_pages(GFP_KERNEL |
+ __GFP_COMP | __GFP_NOWARN,
+ get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH)));
+
if (!ring->data[i])
return -ENOMEM;
}
--
1.7.9.5
On Sun, 5 Jun 2022 11:12:37 +0800 Chen Lin wrote: > + new_data = (void *)__get_free_pages(GFP_ATOMIC | > + __GFP_COMP | __GFP_NOWARN, > + get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH))); Please make this a helper which takes the gfp flags.
When rx_flag == MTK_RX_FLAGS_HWLRO,
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst
Branch to use __get_free_pages when ring->frag_size > PAGE_SIZE.
Signed-off-by: Chen Lin <chen45464546@163.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..3da162e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -899,6 +899,17 @@ static bool mtk_rx_get_desc(struct mtk_eth *eth, struct mtk_rx_dma_v2 *rxd,
return true;
}
+static inline void *mtk_max_lro_buf_alloc(gfp_t gfp_mask)
+{
+ void *data;
+
+ data = (void *)__get_free_pages(gfp_mask |
+ __GFP_COMP | __GFP_NOWARN,
+ get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH)));
+
+ return data;
+}
+
/* the qdma core needs scratch memory to be setup */
static int mtk_init_fq_dma(struct mtk_eth *eth)
{
@@ -1467,7 +1478,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
goto release_desc;
/* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ new_data = napi_alloc_frag(ring->frag_size);
+ else
+ new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC);
if (unlikely(!new_data)) {
netdev->stats.rx_dropped++;
goto release_desc;
@@ -1914,7 +1928,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ else
+ ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL);
if (!ring->data[i])
return -ENOMEM;
}
--
1.7.9.5
On Tue, 7 Jun 2022 07:39:11 +0800 Chen Lin wrote: > +static inline void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) No need for inline, compiler will inline this anyway. > +{ > + void *data; unsigned long data; then you can move the cast from the long line to the return statement, saving us from the strange indentation. > + data = (void *)__get_free_pages(gfp_mask | > + __GFP_COMP | __GFP_NOWARN, > + get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH))); > + > + return data; > +}
At 2022-06-08 07:14:13, "Jakub Kicinski" <kuba@kernel.org> wrote: >On Tue, 7 Jun 2022 07:39:11 +0800 Chen Lin wrote: >> +static inline void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) > >No need for inline, compiler will inline this anyway. > >> +{ >> + void *data; > >unsigned long data; then you can move the cast from the long line to >the return statement, saving us from the strange indentation. > >> + data = (void *)__get_free_pages(gfp_mask | >> + __GFP_COMP | __GFP_NOWARN, >> + get_order(mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH))); >> + >> + return data; >> +} I'll do it like below : +static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) +{ + unsigned long data; + unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH); + + data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN, + get_order(size)); + + return (void *)data; +} Through analysis of the ASM code from objdump, I confirmed that the inline is not necessary. Thanks for your tips. Also, I confirmed that create a new local variable 'size' will not affect the generation of a constant 'order' parameter at compile time. ASM code of calling 'mtk_max_lro_buf_alloc': 'mtk_max_lro_buf_alloc' inlined and 'order'(w1) is a constant 0x2 0000000000004530 <mtk_napi_rx>: ... 4a98: 52854400 mov w0, #0x2a20 // #10784 4a9c: 52800041 mov w1, #0x2 // #2 4aa0: 72a00080 movk w0, #0x4, lsl #16 4aa4: 94000000 bl 0 <__get_free_pages> 4aa8: f90033e0 str x0, [sp, #96] 0000000000000730 <mtk_rx_alloc>: ... 7fc: 2a1703e0 mov w0, w23 800: 52800041 mov w1, #0x2 // #2 804: 7140047f cmp w3, #0x1, lsl #12 808: 54fffe49 b.ls 7d0 <mtk_rx_alloc+0xa0> // b.plast 80c: 94000000 bl 0 <__get_free_pages> The compiler is smart.
When rx_flag == MTK_RX_FLAGS_HWLRO,
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst
Branch to use __get_free_pages when ring->frag_size > PAGE_SIZE.
Signed-off-by: Chen Lin <chen45464546@163.com>
---
drivers/net/ethernet/mediatek/mtk_eth_soc.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..aba0d84 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -899,6 +899,17 @@ static bool mtk_rx_get_desc(struct mtk_eth *eth, struct mtk_rx_dma_v2 *rxd,
return true;
}
+static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask)
+{
+ unsigned long data;
+ unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH);
+
+ data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN,
+ get_order(size));
+
+ return (void *)data;
+}
+
/* the qdma core needs scratch memory to be setup */
static int mtk_init_fq_dma(struct mtk_eth *eth)
{
@@ -1467,7 +1478,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
goto release_desc;
/* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ new_data = napi_alloc_frag(ring->frag_size);
+ else
+ new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC);
if (unlikely(!new_data)) {
netdev->stats.rx_dropped++;
goto release_desc;
@@ -1914,7 +1928,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ else
+ ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL);
if (!ring->data[i])
return -ENOMEM;
}
--
1.7.9.5
Hello: This patch was applied to netdev/net.git (master) by Jakub Kicinski <kuba@kernel.org>: On Wed, 8 Jun 2022 20:46:53 +0800 you wrote: > When rx_flag == MTK_RX_FLAGS_HWLRO, > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE. > netdev_alloc_frag is for alloction of page fragment only. > Reference to other drivers and Documentation/vm/page_frags.rst > > Branch to use __get_free_pages when ring->frag_size > PAGE_SIZE. > > [...] Here is the summary with links: - [v5] net: ethernet: mtk_eth_soc: fix misuse of mem alloc interface netdev[napi]_alloc_frag https://git.kernel.org/netdev/net/c/2f2c0d2919a1 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
On Fri, 3 Jun 2022 12:10:35 +0800 Chen Lin wrote: > - ring->data[i] = netdev_alloc_frag(ring->frag_size); > + if (ring->frag_size <= PAGE_SIZE) > + ring->data[i] = netdev_alloc_frag(ring->frag_size); > + else > + ring->data[i] = kmalloc(ring->frag_size, GFP_KERNEL); Is it legal to allocate pages with kmalloc()? I mean they will end up getting freed by page_frag_free(), not kfree(). Also there's more frag allocations here, search for napi_alloc_frag().
© 2016 - 2024 Red Hat, Inc.