kernel/printk/printk_ringbuffer.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-)
Previously, data blocks that perfectly fit the data ring buffer would
get wrapped around to the beginning for no reason since the calculated
offset of the next data block would belong to the next wrap. Since this
offset is not actually part of the data block, but rather the offset of
where the next data block is going to start, there is no reason to
include it when deciding whether the current block fits the buffer.
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
---
v0->v1:
- Fix severely broken code alignment
---
kernel/printk/printk_ringbuffer.c | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index d9fb053cff67..f885ba8be5e6 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -1002,6 +1002,18 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
return true;
}
+static bool same_lpos_wraps(struct prb_data_ring *data_ring,
+ unsigned long begin_lpos, unsigned long next_lpos)
+{
+ /*
+ * Subtract one from next_lpos since it's not actually part of this data
+ * block. We do this to prevent perfectly fitting records from wrapping
+ * around for no reason.
+ */
+ return DATA_WRAPS(data_ring, begin_lpos) ==
+ DATA_WRAPS(data_ring, next_lpos - 1);
+}
+
/* Determine the end of a data block. */
static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
unsigned long lpos, unsigned int size)
@@ -1013,7 +1025,7 @@ static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
next_lpos = lpos + size;
/* First check if the data block does not wrap. */
- if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
+ if (same_lpos_wraps(data_ring, begin_lpos, next_lpos))
return next_lpos;
/* Wrapping data blocks store their data at the beginning. */
@@ -1081,7 +1093,7 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
blk = to_block(data_ring, begin_lpos);
blk->id = id; /* LMM(data_alloc:B) */
- if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
+ if (!same_lpos_wraps(data_ring, begin_lpos, next_lpos)) {
/* Wrapping data blocks store their data at the beginning. */
blk = to_block(data_ring, 0);
@@ -1125,7 +1137,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
return NULL;
/* Keep track if @blk_lpos was a wrapping data block. */
- wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
+ wrapped = !same_lpos_wraps(data_ring, blk_lpos->begin, blk_lpos->next);
size = to_blk_size(size);
@@ -1151,7 +1163,7 @@ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
blk = to_block(data_ring, blk_lpos->begin);
- if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
+ if (!same_lpos_wraps(data_ring, blk_lpos->begin, next_lpos)) {
struct prb_data_block *old_blk = blk;
/* Wrapping data blocks store their data at the beginning. */
@@ -1187,7 +1199,7 @@ static unsigned int space_used(struct prb_data_ring *data_ring,
if (BLK_DATALESS(blk_lpos))
return 0;
- if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
+ if (same_lpos_wraps(data_ring, blk_lpos->begin, blk_lpos->next)) {
/* Data block does not wrap. */
return (DATA_INDEX(data_ring, blk_lpos->next) -
DATA_INDEX(data_ring, blk_lpos->begin));
@@ -1234,14 +1246,14 @@ static const char *get_data(struct prb_data_ring *data_ring,
}
/* Regular data block: @begin less than @next and in same wrap. */
- if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
+ if (same_lpos_wraps(data_ring, blk_lpos->begin, blk_lpos->next) &&
blk_lpos->begin < blk_lpos->next) {
db = to_block(data_ring, blk_lpos->begin);
*data_size = blk_lpos->next - blk_lpos->begin;
/* Wrapping data block: @begin is one wrap behind @next. */
- } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
- DATA_WRAPS(data_ring, blk_lpos->next)) {
+ } else if (same_lpos_wraps(data_ring,
+ blk_lpos->begin + DATA_SIZE(data_ring), blk_lpos->next)) {
db = to_block(data_ring, 0);
*data_size = DATA_INDEX(data_ring, blk_lpos->next);
--
2.43.0
On Wed 2025-09-03 03:10:08, Daniil Tatianin wrote: > Previously, data blocks that perfectly fit the data ring buffer would > get wrapped around to the beginning for no reason since the calculated > offset of the next data block would belong to the next wrap. Since this > offset is not actually part of the data block, but rather the offset of > where the next data block is going to start, there is no reason to > include it when deciding whether the current block fits the buffer. I am afraid to touch this code. I am curious how you found this ;-) > diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c > index d9fb053cff67..f885ba8be5e6 100644 > --- a/kernel/printk/printk_ringbuffer.c > +++ b/kernel/printk/printk_ringbuffer.c > @@ -1002,6 +1002,18 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) > return true; > } > > +static bool same_lpos_wraps(struct prb_data_ring *data_ring, > + unsigned long begin_lpos, unsigned long next_lpos) The name might be slightly confusing because it might return true even when the two given values do not belong into the same wrap. I would make more clear that it is about the data block starting on begin_lpos. Either I would call it same_wrap_data_block() or I would invert the logic and call it: is_wrapped_data_block() The 2nd variant looks more self-explanatory to me. Otherwise the patch looks good to me. I was not able to find any scenario when it would blow up just by reading the code. Well, I would still like to do some tests. Best Regards, Petr
On 2025-09-03, Daniil Tatianin <d-tatianin@yandex-team.ru> wrote: > Previously, data blocks that perfectly fit the data ring buffer would > get wrapped around to the beginning for no reason since the calculated > offset of the next data block would belong to the next wrap. Since this > offset is not actually part of the data block, but rather the offset of > where the next data block is going to start, there is no reason to > include it when deciding whether the current block fits the buffer. This is a nice catch! Although note that this patch avoids wasting a maximum of 8 bytes of ringbuffer space. If you are interested in tackling the wasted-space issue of the printk ringbuffer there are much larger [0] fish to catch. [0] https://lore.kernel.org/lkml/84y10vz7ty.fsf@jogness.linutronix.de My comments below... > diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c > index d9fb053cff67..f885ba8be5e6 100644 > --- a/kernel/printk/printk_ringbuffer.c > +++ b/kernel/printk/printk_ringbuffer.c > @@ -1002,6 +1002,18 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) > return true; > } > > +static bool same_lpos_wraps(struct prb_data_ring *data_ring, > + unsigned long begin_lpos, unsigned long next_lpos) We need a better name here since it is not actually using the value of @next_lpos to check the wrap count. Perhaps inverting the return value and naming it blk_lpos_wraps(). So it would be identifying if the given blk_lpos values lead to a wrapping data block. Some like this: diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index d9fb053cff67d..cf0fcd9b106ae 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1002,6 +995,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) return true; } +/* Identify if given blk_lpos values represent a wrapping data block. */ +static bool blk_lpos_wraps(struct prb_data_ring *data_ring, + unsigned long begin_lpos, unsigned long next_lpos) +{ + /* + * Subtract one from @next_lpos since it is not actually part of this + * data block. This allows perfectly fitting records to not wrap. + */ + return (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos - 1)); +} + /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) > +{ > + /* > + * Subtract one from next_lpos since it's not actually part of this data > + * block. We do this to prevent perfectly fitting records from wrapping > + * around for no reason. > + */ > + return DATA_WRAPS(data_ring, begin_lpos) == > + DATA_WRAPS(data_ring, next_lpos - 1); > +} > + > /* Determine the end of a data block. */ > static unsigned long get_next_lpos(struct prb_data_ring *data_ring, > unsigned long lpos, unsigned int size) The rest looked fine to me and also passed various private tests. However, we should also update data_check_size(), since now data blocks are allowed to occupy the entire data ring. Something like this: diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index d9fb053cff67d..e6bdfb8237a3d 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -397,21 +397,14 @@ static unsigned int to_blk_size(unsigned int size) */ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) { - struct prb_data_block *db = NULL; - if (size == 0) return true; /* * Ensure the alignment padded size could possibly fit in the data - * array. The largest possible data block must still leave room for - * at least the ID of the next block. + * array. */ - size = to_blk_size(size); - if (size > DATA_SIZE(data_ring) - sizeof(db->id)) - return false; - - return true; + return (to_blk_size(size) <= DATA_SIZE(data_ring)); } /* Query the state of a descriptor. */ Petr may have other ideas/opinions about this, so you should wait for a response from him before submitting a new version. John
On Thu 2025-09-04 16:04:30, John Ogness wrote: > On 2025-09-03, Daniil Tatianin <d-tatianin@yandex-team.ru> wrote: > > Previously, data blocks that perfectly fit the data ring buffer would > > get wrapped around to the beginning for no reason since the calculated > > offset of the next data block would belong to the next wrap. Since this > > offset is not actually part of the data block, but rather the offset of > > where the next data block is going to start, there is no reason to > > include it when deciding whether the current block fits the buffer. > > This is a nice catch! > > Although note that this patch avoids wasting a maximum of 8 bytes of > ringbuffer space. If you are interested in tackling the wasted-space > issue of the printk ringbuffer there are much larger [0] fish to catch. > > [0] https://lore.kernel.org/lkml/84y10vz7ty.fsf@jogness.linutronix.de > > My comments below... > > > diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c > > index d9fb053cff67..f885ba8be5e6 100644 > > --- a/kernel/printk/printk_ringbuffer.c > > +++ b/kernel/printk/printk_ringbuffer.c > > @@ -1002,6 +1002,18 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) > > return true; > > } > > > > +static bool same_lpos_wraps(struct prb_data_ring *data_ring, > > + unsigned long begin_lpos, unsigned long next_lpos) > > We need a better name here since it is not actually using the value of > @next_lpos to check the wrap count. Perhaps inverting the return value > and naming it blk_lpos_wraps(). So it would be identifying if the given > blk_lpos values lead to a wrapping data block. Some like this: > > diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c > index d9fb053cff67d..cf0fcd9b106ae 100644 > --- a/kernel/printk/printk_ringbuffer.c > +++ b/kernel/printk/printk_ringbuffer.c > @@ -1002,6 +995,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) > return true; > } > > +/* Identify if given blk_lpos values represent a wrapping data block. */ > +static bool blk_lpos_wraps(struct prb_data_ring *data_ring, > + unsigned long begin_lpos, unsigned long next_lpos) > +{ > + /* > + * Subtract one from @next_lpos since it is not actually part of this > + * data block. This allows perfectly fitting records to not wrap. > + */ > + return (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos - 1)); > +} Or a combination of my and this proposal: is_blk_wrapped(). > + > /* Determine the end of a data block. */ > static unsigned long get_next_lpos(struct prb_data_ring *data_ring, > unsigned long lpos, unsigned int size) > > > +{ > > + /* > > + * Subtract one from next_lpos since it's not actually part of this data > > + * block. We do this to prevent perfectly fitting records from wrapping > > + * around for no reason. > > + */ > > + return DATA_WRAPS(data_ring, begin_lpos) == > > + DATA_WRAPS(data_ring, next_lpos - 1); > > +} > > + > > /* Determine the end of a data block. */ > > static unsigned long get_next_lpos(struct prb_data_ring *data_ring, > > unsigned long lpos, unsigned int size) > > The rest looked fine to me and also passed various private > tests. However, we should also update data_check_size(), since now data > blocks are allowed to occupy the entire data ring. Something like this: > > diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c > index d9fb053cff67d..e6bdfb8237a3d 100644 > --- a/kernel/printk/printk_ringbuffer.c > +++ b/kernel/printk/printk_ringbuffer.c > @@ -397,21 +397,14 @@ static unsigned int to_blk_size(unsigned int size) > */ > static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) > { > - struct prb_data_block *db = NULL; > - > if (size == 0) > return true; > > /* > * Ensure the alignment padded size could possibly fit in the data > - * array. The largest possible data block must still leave room for > - * at least the ID of the next block. > + * array. > */ > - size = to_blk_size(size); > - if (size > DATA_SIZE(data_ring) - sizeof(db->id)) > - return false; > - > - return true; > + return (to_blk_size(size) <= DATA_SIZE(data_ring)); > } I hope that we would never reach this limit. A buffer for one message does not look practical. I originally suggested to avoid messages bigger than 1/4 of the buffer size ;-) That said, strictly speaking, the above change looks correct. I would just do it in a separate patch. The use of the full buffer and the limit of the maximal message are related but they are not the same things. Also separate patch might help with bisection in case of problems. Best Regards, Petr
On 2025-09-04, Petr Mladek <pmladek@suse.com> wrote: > On Thu 2025-09-04 16:04:30, John Ogness wrote: >> > +static bool same_lpos_wraps(struct prb_data_ring *data_ring, >> > + unsigned long begin_lpos, unsigned long next_lpos) >> >> We need a better name here since it is not actually using the value of >> @next_lpos to check the wrap count. Perhaps inverting the return value >> and naming it blk_lpos_wraps(). So it would be identifying if the given >> blk_lpos values lead to a wrapping data block. > > Or a combination of my and this proposal: is_blk_wrapped(). Sounds good to me. >> The rest looked fine to me and also passed various private >> tests. However, we should also update data_check_size(), since now data >> blocks are allowed to occupy the entire data ring. Something like this: >> >> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c >> index d9fb053cff67d..e6bdfb8237a3d 100644 >> --- a/kernel/printk/printk_ringbuffer.c >> +++ b/kernel/printk/printk_ringbuffer.c >> @@ -397,21 +397,14 @@ static unsigned int to_blk_size(unsigned int size) >> */ >> static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) >> { >> - struct prb_data_block *db = NULL; >> - >> if (size == 0) >> return true; >> >> /* >> * Ensure the alignment padded size could possibly fit in the data >> - * array. The largest possible data block must still leave room for >> - * at least the ID of the next block. >> + * array. >> */ >> - size = to_blk_size(size); >> - if (size > DATA_SIZE(data_ring) - sizeof(db->id)) >> - return false; >> - >> - return true; >> + return (to_blk_size(size) <= DATA_SIZE(data_ring)); >> } > > I hope that we would never reach this limit. A buffer for one > message does not look practical. I originally suggested to avoid > messages bigger than 1/4 of the buffer size ;-) > > That said, strictly speaking, the above change looks correct. > I would just do it in a separate patch. The use of the full > buffer and the limit of the maximal message are related > but they are not the same things. Also separate patch might > help with bisection in case of problems. FWIW, Aside from inspecting all the related code carefully, I also performed various size and boundary tests using small buffers (like 1KB and 2KB). I am confortable with these changes. John
On Fri 2025-09-05 11:19:02, John Ogness wrote: > FWIW, Aside from inspecting all the related code carefully, I also > performed various size and boundary tests using small buffers (like 1KB > and 2KB). I am confortable with these changes. Great. I wanted to do similar tests ;-) Daniil, feel free to send new version. Best Regards, Petr
On 9/4/25 7:33 PM, Petr Mladek wrote: > On Thu 2025-09-04 16:04:30, John Ogness wrote: >> On 2025-09-03, Daniil Tatianin <d-tatianin@yandex-team.ru> wrote: >>> Previously, data blocks that perfectly fit the data ring buffer would >>> get wrapped around to the beginning for no reason since the calculated >>> offset of the next data block would belong to the next wrap. Since this >>> offset is not actually part of the data block, but rather the offset of >>> where the next data block is going to start, there is no reason to >>> include it when deciding whether the current block fits the buffer. >> This is a nice catch! >> >> Although note that this patch avoids wasting a maximum of 8 bytes of >> ringbuffer space. If you are interested in tackling the wasted-space >> issue of the printk ringbuffer there are much larger [0] fish to catch. >> >> [0] https://lore.kernel.org/lkml/84y10vz7ty.fsf@jogness.linutronix.de >> >> My comments below... >> >>> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c >>> index d9fb053cff67..f885ba8be5e6 100644 >>> --- a/kernel/printk/printk_ringbuffer.c >>> +++ b/kernel/printk/printk_ringbuffer.c >>> @@ -1002,6 +1002,18 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) >>> return true; >>> } >>> >>> +static bool same_lpos_wraps(struct prb_data_ring *data_ring, >>> + unsigned long begin_lpos, unsigned long next_lpos) >> We need a better name here since it is not actually using the value of >> @next_lpos to check the wrap count. Perhaps inverting the return value >> and naming it blk_lpos_wraps(). So it would be identifying if the given >> blk_lpos values lead to a wrapping data block. Some like this: >> >> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c >> index d9fb053cff67d..cf0fcd9b106ae 100644 >> --- a/kernel/printk/printk_ringbuffer.c >> +++ b/kernel/printk/printk_ringbuffer.c >> @@ -1002,6 +995,17 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) >> return true; >> } >> >> +/* Identify if given blk_lpos values represent a wrapping data block. */ >> +static bool blk_lpos_wraps(struct prb_data_ring *data_ring, >> + unsigned long begin_lpos, unsigned long next_lpos) >> +{ >> + /* >> + * Subtract one from @next_lpos since it is not actually part of this >> + * data block. This allows perfectly fitting records to not wrap. >> + */ >> + return (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos - 1)); >> +} > Or a combination of my and this proposal: is_blk_wrapped(). > >> + >> /* Determine the end of a data block. */ >> static unsigned long get_next_lpos(struct prb_data_ring *data_ring, >> unsigned long lpos, unsigned int size) >> >>> +{ >>> + /* >>> + * Subtract one from next_lpos since it's not actually part of this data >>> + * block. We do this to prevent perfectly fitting records from wrapping >>> + * around for no reason. >>> + */ >>> + return DATA_WRAPS(data_ring, begin_lpos) == >>> + DATA_WRAPS(data_ring, next_lpos - 1); >>> +} >>> + >>> /* Determine the end of a data block. */ >>> static unsigned long get_next_lpos(struct prb_data_ring *data_ring, >>> unsigned long lpos, unsigned int size) >> The rest looked fine to me and also passed various private >> tests. However, we should also update data_check_size(), since now data >> blocks are allowed to occupy the entire data ring. Something like this: >> >> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c >> index d9fb053cff67d..e6bdfb8237a3d 100644 >> --- a/kernel/printk/printk_ringbuffer.c >> +++ b/kernel/printk/printk_ringbuffer.c >> @@ -397,21 +397,14 @@ static unsigned int to_blk_size(unsigned int size) >> */ >> static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) >> { >> - struct prb_data_block *db = NULL; >> - >> if (size == 0) >> return true; >> >> /* >> * Ensure the alignment padded size could possibly fit in the data >> - * array. The largest possible data block must still leave room for >> - * at least the ID of the next block. >> + * array. >> */ >> - size = to_blk_size(size); >> - if (size > DATA_SIZE(data_ring) - sizeof(db->id)) >> - return false; >> - >> - return true; >> + return (to_blk_size(size) <= DATA_SIZE(data_ring)); >> } > I hope that we would never reach this limit. A buffer for one > message does not look practical. I originally suggested to avoid > messages bigger than 1/4 of the buffer size ;-) > > That said, strictly speaking, the above change looks correct. > I would just do it in a separate patch. The use of the full > buffer and the limit of the maximal message are related > but they are not the same things. Also separate patch might > help with bisection in case of problems. Sounds good to me. Do you need more time for extra testing, or can I go ahead and submit a new series? Since you asked, I noticed this issue when studying the code to make a similar lockless log ring for my hobby OS :) I might also take a look at reducing the memory usage for the metadata at some point (from discussions linked in John's email). Thanks for the quick reviews! > > Best Regards, > Petr
On 2025-09-03, Daniil Tatianin <d-tatianin@yandex-team.ru> wrote: > Previously, data blocks that perfectly fit the data ring buffer would > get wrapped around to the beginning for no reason since the calculated > offset of the next data block would belong to the next wrap. Since this > offset is not actually part of the data block, but rather the offset of > where the next data block is going to start, there is no reason to > include it when deciding whether the current block fits the buffer. FYI, I am taking a look at this. We need to be really careful not to introduce any subtle bugs. As an example, data_check_size() makes sure there is always room for the trailing "next ID". But with your patch, that check is overconservative. For that particular case it really isn't an issue, it just means you would not be able to have a record the fills the full ringbuffer. But my point is that there may be parts of the code that assume there is always a trailing "next ID" (since so far that has always been true). I just want to be careful in my review of it. So please be patient. John
© 2016 - 2025 Red Hat, Inc.