Rework _add_regex() to avoid doing the lookup twice for the (hopefully
common) cache-hit case.
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
scripts/lib/kdoc/kdoc_re.py | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py
index e81695b273bf..612223e1e723 100644
--- a/scripts/lib/kdoc/kdoc_re.py
+++ b/scripts/lib/kdoc/kdoc_re.py
@@ -29,12 +29,9 @@ class KernRe:
"""
Adds a new regex or re-use it from the cache.
"""
-
- if string in re_cache:
- self.regex = re_cache[string]
- else:
+ self.regex = re_cache.get(string, None)
+ if not self.regex:
self.regex = re.compile(string, flags=flags)
-
if self.cache:
re_cache[string] = self.regex
--
2.49.0
Em Thu, 3 Jul 2025 12:43:58 -0600 Jonathan Corbet <corbet@lwn.net> escreveu: > Rework _add_regex() to avoid doing the lookup twice for the (hopefully > common) cache-hit case. > > Signed-off-by: Jonathan Corbet <corbet@lwn.net> > --- > scripts/lib/kdoc/kdoc_re.py | 7 ++----- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py > index e81695b273bf..612223e1e723 100644 > --- a/scripts/lib/kdoc/kdoc_re.py > +++ b/scripts/lib/kdoc/kdoc_re.py > @@ -29,12 +29,9 @@ class KernRe: > """ > Adds a new regex or re-use it from the cache. > """ > - > - if string in re_cache: > - self.regex = re_cache[string] > - else: > + self.regex = re_cache.get(string, None) With get, None is default... > + if not self.regex: > self.regex = re.compile(string, flags=flags) ... yet, as you're using get, better to code it as: self.regex = re_cache.get(string, re.compile(string, flags=flags)) > - > if self.cache: > re_cache[string] = self.regex > Thanks, Mauro
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: > Em Thu, 3 Jul 2025 12:43:58 -0600 > Jonathan Corbet <corbet@lwn.net> escreveu: > >> Rework _add_regex() to avoid doing the lookup twice for the (hopefully >> common) cache-hit case. >> >> Signed-off-by: Jonathan Corbet <corbet@lwn.net> >> --- >> scripts/lib/kdoc/kdoc_re.py | 7 ++----- >> 1 file changed, 2 insertions(+), 5 deletions(-) >> >> diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py >> index e81695b273bf..612223e1e723 100644 >> --- a/scripts/lib/kdoc/kdoc_re.py >> +++ b/scripts/lib/kdoc/kdoc_re.py >> @@ -29,12 +29,9 @@ class KernRe: >> """ >> Adds a new regex or re-use it from the cache. >> """ >> - >> - if string in re_cache: >> - self.regex = re_cache[string] >> - else: >> + self.regex = re_cache.get(string, None) > > With get, None is default... > >> + if not self.regex: >> self.regex = re.compile(string, flags=flags) > > ... yet, as you're using get, better to code it as: > > self.regex = re_cache.get(string, re.compile(string, flags=flags)) ...but that will recompile the regex each time, defeating the purpose of the cache, no? Thanks, jon
Em Thu, 03 Jul 2025 17:47:13 -0600 Jonathan Corbet <corbet@lwn.net> escreveu: > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: > > > Em Thu, 3 Jul 2025 12:43:58 -0600 > > Jonathan Corbet <corbet@lwn.net> escreveu: > > > >> Rework _add_regex() to avoid doing the lookup twice for the (hopefully > >> common) cache-hit case. > >> > >> Signed-off-by: Jonathan Corbet <corbet@lwn.net> > >> --- > >> scripts/lib/kdoc/kdoc_re.py | 7 ++----- > >> 1 file changed, 2 insertions(+), 5 deletions(-) > >> > >> diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py > >> index e81695b273bf..612223e1e723 100644 > >> --- a/scripts/lib/kdoc/kdoc_re.py > >> +++ b/scripts/lib/kdoc/kdoc_re.py > >> @@ -29,12 +29,9 @@ class KernRe: > >> """ > >> Adds a new regex or re-use it from the cache. > >> """ > >> - > >> - if string in re_cache: > >> - self.regex = re_cache[string] > >> - else: > >> + self.regex = re_cache.get(string, None) > > > > With get, None is default... > > > >> + if not self.regex: > >> self.regex = re.compile(string, flags=flags) > > > > ... yet, as you're using get, better to code it as: > > > > self.regex = re_cache.get(string, re.compile(string, flags=flags)) > > ...but that will recompile the regex each time, defeating the purpose of > the cache, no? No. It should do exactly like the previous code: - if re_cache[string] exists, it returns it. - Otherwise, it returns re.compile(string, flags=flags). https://www.w3schools.com/python/ref_dictionary_get.asp Thanks, Mauro
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: > Em Thu, 03 Jul 2025 17:47:13 -0600 > Jonathan Corbet <corbet@lwn.net> escreveu: > >> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: >> >> > Em Thu, 3 Jul 2025 12:43:58 -0600 >> > Jonathan Corbet <corbet@lwn.net> escreveu: >> > >> >> Rework _add_regex() to avoid doing the lookup twice for the (hopefully >> >> common) cache-hit case. >> >> >> >> Signed-off-by: Jonathan Corbet <corbet@lwn.net> >> >> --- >> >> scripts/lib/kdoc/kdoc_re.py | 7 ++----- >> >> 1 file changed, 2 insertions(+), 5 deletions(-) >> >> >> >> diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py >> >> index e81695b273bf..612223e1e723 100644 >> >> --- a/scripts/lib/kdoc/kdoc_re.py >> >> +++ b/scripts/lib/kdoc/kdoc_re.py >> >> @@ -29,12 +29,9 @@ class KernRe: >> >> """ >> >> Adds a new regex or re-use it from the cache. >> >> """ >> >> - >> >> - if string in re_cache: >> >> - self.regex = re_cache[string] >> >> - else: >> >> + self.regex = re_cache.get(string, None) >> > >> > With get, None is default... >> > >> >> + if not self.regex: >> >> self.regex = re.compile(string, flags=flags) >> > >> > ... yet, as you're using get, better to code it as: >> > >> > self.regex = re_cache.get(string, re.compile(string, flags=flags)) >> >> ...but that will recompile the regex each time, defeating the purpose of >> the cache, no? > > No. It should do exactly like the previous code: > > - if re_cache[string] exists, it returns it. > - Otherwise, it returns re.compile(string, flags=flags). > > https://www.w3schools.com/python/ref_dictionary_get.asp The re.compile() call is evaluated before the call to get() - just like it would be in C. This is easy enough to prove to yourself in the REPL if you doubt me... Thanks, jon
Em Fri, 04 Jul 2025 08:59:45 -0600 Jonathan Corbet <corbet@lwn.net> escreveu: > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: > > > Em Thu, 03 Jul 2025 17:47:13 -0600 > > Jonathan Corbet <corbet@lwn.net> escreveu: > > > >> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> writes: > >> > >> > Em Thu, 3 Jul 2025 12:43:58 -0600 > >> > Jonathan Corbet <corbet@lwn.net> escreveu: > >> > > >> >> Rework _add_regex() to avoid doing the lookup twice for the (hopefully > >> >> common) cache-hit case. > >> >> > >> >> Signed-off-by: Jonathan Corbet <corbet@lwn.net> > >> >> --- > >> >> scripts/lib/kdoc/kdoc_re.py | 7 ++----- > >> >> 1 file changed, 2 insertions(+), 5 deletions(-) > >> >> > >> >> diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py > >> >> index e81695b273bf..612223e1e723 100644 > >> >> --- a/scripts/lib/kdoc/kdoc_re.py > >> >> +++ b/scripts/lib/kdoc/kdoc_re.py > >> >> @@ -29,12 +29,9 @@ class KernRe: > >> >> """ > >> >> Adds a new regex or re-use it from the cache. > >> >> """ > >> >> - > >> >> - if string in re_cache: > >> >> - self.regex = re_cache[string] > >> >> - else: > >> >> + self.regex = re_cache.get(string, None) > >> > > >> > With get, None is default... > >> > > >> >> + if not self.regex: > >> >> self.regex = re.compile(string, flags=flags) > >> > > >> > ... yet, as you're using get, better to code it as: > >> > > >> > self.regex = re_cache.get(string, re.compile(string, flags=flags)) > >> > >> ...but that will recompile the regex each time, defeating the purpose of > >> the cache, no? > > > > No. It should do exactly like the previous code: > > > > - if re_cache[string] exists, it returns it. > > - Otherwise, it returns re.compile(string, flags=flags). > > > > https://www.w3schools.com/python/ref_dictionary_get.asp > > The re.compile() call is evaluated before the call to get() - just like > it would be in C. This is easy enough to prove to yourself in the REPL > if you doubt me... You're right! Tested with the small code snippet: # test.py inner called Inner will be called: True inner called Inner should not be called: False I guess I expected too much from python's optimizer ;-) My fault. Your patch looks OK to me. Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> - As reference, this was the test code #!/usr/bin/env python3 def inner(): print("inner called") return True c = {} print(f"Inner will be called: {c.get('a', inner())}") c = { "a": "False"} print(f"Inner should not be called: {c.get('a', inner())}") Thanks, Mauro
Em Fri, 4 Jul 2025 00:31:46 +0200 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> escreveu: > Em Thu, 3 Jul 2025 12:43:58 -0600 > Jonathan Corbet <corbet@lwn.net> escreveu: > > > Rework _add_regex() to avoid doing the lookup twice for the (hopefully > > common) cache-hit case. > > > > Signed-off-by: Jonathan Corbet <corbet@lwn.net> > > --- > > scripts/lib/kdoc/kdoc_re.py | 7 ++----- > > 1 file changed, 2 insertions(+), 5 deletions(-) > > > > diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py > > index e81695b273bf..612223e1e723 100644 > > --- a/scripts/lib/kdoc/kdoc_re.py > > +++ b/scripts/lib/kdoc/kdoc_re.py > > @@ -29,12 +29,9 @@ class KernRe: > > """ > > Adds a new regex or re-use it from the cache. > > """ > > - > > - if string in re_cache: > > - self.regex = re_cache[string] > > - else: > > + self.regex = re_cache.get(string, None) > > With get, None is default... > > > + if not self.regex: > > self.regex = re.compile(string, flags=flags) > > ... yet, as you're using get, better to code it as: > > self.regex = re_cache.get(string, re.compile(string, flags=flags)) For got to mention: with or without that: Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> > > > - > > if self.cache: > > re_cache[string] = self.regex > > > > > > Thanks, > Mauro Thanks, Mauro
© 2016 - 2025 Red Hat, Inc.