[PATCH v3] scripts/tags.sh: fix "make COMPILED_SOURCE=1 cscope" command ends up with *.rlib, *.rmeta, *.so filenames in cscope.files

Sergei Litvin posted 1 patch 6 days, 7 hours ago
There is a newer version of this series
scripts/tags.sh | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
[PATCH v3] scripts/tags.sh: fix "make COMPILED_SOURCE=1 cscope" command ends up with *.rlib, *.rmeta, *.so filenames in cscope.files
Posted by Sergei Litvin 6 days, 7 hours ago
PROBLEM

(1) When executing the command `make COMPILED_SOURCE=1 cscope`, the
`cscope.files` file generated by it includes filenames with the
extensions *.rlib, *.rmeta, and *.so (taken from *.cmd files, which, in turn,
are generated from *.d dependency files by the `fixdep` utility).

(2) When executing the command `make cscope`, the `cscope.files` file generated
by it includes only filenames with the extensions *.h, *.c, *.S and not includes
filenames with *.rs extensions.

SOLUTION

(1) Modify the regular expression in the `all_compiled_sources()` function so
that only files with the extensions *.h, *.c, *.S, and *.rs are accepted.

This causes `make COMPILED_SOURCE=1 cscope` command to generate the
`cscope.files` file that contains only files with the extensions *.h, *.c, *.S,
and *.rs.

(2) Modify the functions `find_arch_sources()`, `find_arch_include_sources()`,
`find_include_sources()`, and `find_other_sources()` so that they no longer
accept the filename pattern as the last argument, but instead retrieve it from
the new variable `pattern` (analogous to the variables `ignore`, `prune`, and
`tree`) which includes *.rs along with *.h, *.c, *.S.

This causes `make cscope` command to generate a `cscope.files` file that
contains *.rs files along with *.h, *.c, *.S

Signed-off-by: Sergei Litvin <litvindev@gmail.com>

---

Changes since V2:
https://lore.kernel.org/lkml/20260530085821.19689-1-litvindev@gmail.com/

- The functions find_arch_sources()`, `find_arch_include_sources()`,
`find_include_sources()`, and `find_other_sources()` have been modified rather
than having their calls duplicated to avoid redundant directory
traversals (suggested by Sashiko).

– The regular expression in the `all_compiled_sources()` function was improved
by adding a lookahead anchor (?=\s) to the first branch (also suggested by Sashiko).

Changes between the V1 and V2:
https://lore.kernel.org/lkml/20260530085821.19689-1-litvindev@gmail.com/
https://lore.kernel.org/lkml/20260530070631.14955-1-litvindev@gmail.com/

- In Version V1, the `-name` parameters to the `find` calls in the functions
`find_arch_sources()`, `find_arch_include_sources()`, `find_include_sources()`,
and `find_other_sources()` were replaced by `-regextype posix-extended
-regex`. However, as noted by Sashiko, the `-regextype` option is a GNU-specific
extension; therefore, in V2, duplicate function calls were introduced instead.
---
 scripts/tags.sh | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 243373683f98..be7c2ffd2d4e 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -12,6 +12,9 @@ if [[ "$KBUILD_VERBOSE" =~ 1 ]]; then
 	set -x
 fi
 
+# sources to searh
+pattern="( -name *.[chS] -o -name *.rs )"
+
 # RCS_FIND_IGNORE has escaped ()s -- remove them.
 ignore="$(echo "$RCS_FIND_IGNORE" | sed 's|\\||g' )"
 # tags and cscope files should also ignore MODVERSION *.mod.c files
@@ -52,7 +55,7 @@ find_arch_sources()
 	for i in $archincludedir; do
 		local prune="$prune ( -path $i ) -prune -o"
 	done
-	find ${tree}arch/$1 $ignore $prune -name "$2" -not -type l -print;
+	find ${tree}arch/$1 $ignore $prune $pattern -not -type l -print;
 }
 
 # find sources in arch/$1/include
@@ -61,14 +64,14 @@ find_arch_include_sources()
 	local include=$(find ${tree}arch/$1/ -name include -type d -print);
 	if [ -n "$include" ]; then
 		archincludedir="$archincludedir $include"
-		find $include $ignore -name "$2" -not -type l -print;
+		find $include $ignore $pattern -not -type l -print;
 	fi
 }
 
 # find sources in include/
 find_include_sources()
 {
-	find ${tree}include $ignore -name config -prune -o -name "$1" \
+	find ${tree}include $ignore -name config -prune -o $pattern \
 		-not -type l -print;
 }
 
@@ -78,21 +81,21 @@ find_other_sources()
 {
 	find ${tree}* $ignore \
 	     \( -path ${tree}include -o -path ${tree}arch -o -name '.tmp_*' \) -prune -o \
-	       -name "$1" -not -type l -print;
+	       $pattern -not -type l -print;
 }
 
 all_sources()
 {
-	find_arch_include_sources ${SRCARCH} '*.[chS]'
+	find_arch_include_sources ${SRCARCH}
 	if [ -n "$archinclude" ]; then
-		find_arch_include_sources $archinclude '*.[chS]'
+		find_arch_include_sources $archinclude
 	fi
-	find_include_sources '*.[chS]'
+	find_include_sources
 	for arch in $ALLSOURCE_ARCHS
 	do
-		find_arch_sources $arch '*.[chS]'
+		find_arch_sources $arch
 	done
-	find_other_sources '*.[chS]'
+	find_other_sources
 }
 
 all_compiled_sources()
@@ -100,7 +103,7 @@ all_compiled_sources()
 	{
 		echo include/generated/autoconf.h
 		find $ignore -name "*.cmd" -exec \
-			grep -Poh '(?<=^  )\S+|(?<== )\S+[^\\](?=$)' {} \+ |
+			grep -Poh '(?<=^  )\S+\.([chS]|rs)(?=\s)|(?<== )\S+\.(?1)(?=$)' {} \+ |
 		awk '!a[$0]++'
 	} | xargs realpath -esq $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) |
 	sort -u
-- 
2.54.0

[PATCH v4] scripts/tags.sh: fix "make COMPILED_SOURCE=1 cscope" command ends up with *.rlib, *.rmeta, *.so filenames in cscope.files
Posted by Sergei Litvin 5 days, 16 hours ago
PROBLEM

(1) When executing the command `make COMPILED_SOURCE=1 cscope`, the
`cscope.files` file generated by it includes filenames with the
extensions *.rlib, *.rmeta, and *.so (taken from *.cmd files, which, in turn,
are generated from *.d dependency files by the `fixdep` utility).

(2) When executing the command `make cscope`, the `cscope.files` file generated
by it includes only filenames with the extensions *.h, *.c, *.S and not includes
filenames with *.rs extensions.

SOLUTION

(1) Modify the regular expression in the `all_compiled_sources()` function so
that only files with the extensions *.h, *.c, *.S, and *.rs are accepted.

This causes `make COMPILED_SOURCE=1 cscope` command to generate the
`cscope.files` file that contains only files with the extensions *.h, *.c, *.S,
and *.rs.

(2) Modify the functions `find_arch_sources()`, `find_arch_include_sources()`,
`find_include_sources()`, and `find_other_sources()` so that they can accept an
unlimited number of filename patterns as parameters for the search. Add the
`setup_name_pattern()` function to convert these filename pattern parameters
into a list of parameters that can be passed to the `find` utility via the new
`pattern` variable.

This causes `make cscope` command to generate a `cscope.files` file that
contains *.rs files along with *.h, *.c, *.S

Signed-off-by: Sergei Litvin <litvindev@gmail.com>

---

Changes since V3:
https://lore.kernel.org/lkml/20260601211850.3378-1-litvindev@gmail.com/

In V3, the functions `find_arch_sources()`, `find_arch_include_sources()`,
`find_include_sources()`, and `find_other_sources()` were modified so that they
no longer accept the filename pattern as the last argument, but instead retrieve
it from the new variable `pattern`. However, at the time, I had overlooked the
case where the `find_other_sources` function was called with the `Kconfig*`
parameter. And after Sashiko pointed this out to me, I decided to rewrite it in
the current version.

Changes since V2:
https://lore.kernel.org/lkml/20260530085821.19689-1-litvindev@gmail.com/

- The functions find_arch_sources()`, `find_arch_include_sources()`,
`find_include_sources()`, and `find_other_sources()` have been modified rather
than having their calls duplicated to avoid redundant directory
traversals (suggested by Sashiko).

– The regular expression in the `all_compiled_sources()` function was improved
by adding a lookahead anchor (?=\s) to the first branch (also suggested by Sashiko).

Changes between the V1 and V2:
https://lore.kernel.org/lkml/20260530085821.19689-1-litvindev@gmail.com/
https://lore.kernel.org/lkml/20260530070631.14955-1-litvindev@gmail.com/

- In Version V1, the `-name` parameters to the `find` calls in the functions
`find_arch_sources()`, `find_arch_include_sources()`, `find_include_sources()`,
and `find_other_sources()` were replaced by `-regextype posix-extended
-regex`. However, as noted by Sashiko, the `-regextype` option is a GNU-specific
extension; therefore, in V2, duplicate function calls were introduced instead.

Signed-off-by: Sergei Litvin <litvindev@gmail.com>
---
 scripts/tags.sh | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 243373683f98..41e38df96984 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -46,13 +46,31 @@ elif [ "${ALLSOURCE_ARCHS}" = "all" ]; then
 	ALLSOURCE_ARCHS=$(find ${tree}arch/ -mindepth 1 -maxdepth 1 -type d -printf '%f ')
 fi
 
+setup_name_pattern()
+{
+	pattern=()
+	for ext; do
+		if [ ${#pattern[@]} -gt 0 ]; then
+			pattern+=("-o" "-name" "$ext")
+		else
+			pattern+=("(" "-name" "$ext")
+		fi
+	done
+	if [ ${#pattern[@]} -gt 0 ]; then
+		pattern+=(")")
+	fi
+}
+
 # find sources in arch/$1
 find_arch_sources()
 {
 	for i in $archincludedir; do
 		local prune="$prune ( -path $i ) -prune -o"
 	done
-	find ${tree}arch/$1 $ignore $prune -name "$2" -not -type l -print;
+	local src=${tree}arch/$1
+	shift
+	setup_name_pattern "$@"
+	find $src $ignore $prune "${pattern[@]}" -not -type l -print;
 }
 
 # find sources in arch/$1/include
@@ -61,14 +79,17 @@ find_arch_include_sources()
 	local include=$(find ${tree}arch/$1/ -name include -type d -print);
 	if [ -n "$include" ]; then
 		archincludedir="$archincludedir $include"
-		find $include $ignore -name "$2" -not -type l -print;
+		shift
+		setup_name_pattern "$@"
+		find $include $ignore "${pattern[@]}" -not -type l -print;
 	fi
 }
 
 # find sources in include/
 find_include_sources()
 {
-	find ${tree}include $ignore -name config -prune -o -name "$1" \
+	setup_name_pattern "$@"
+	find ${tree}include $ignore -name config -prune -o "${pattern[@]}" \
 		-not -type l -print;
 }
 
@@ -76,23 +97,24 @@ find_include_sources()
 # we could benefit from a list of dirs to search in here
 find_other_sources()
 {
+	setup_name_pattern "$@"
 	find ${tree}* $ignore \
 	     \( -path ${tree}include -o -path ${tree}arch -o -name '.tmp_*' \) -prune -o \
-	       -name "$1" -not -type l -print;
+	       "${pattern[@]}" -not -type l -print;
 }
 
 all_sources()
 {
-	find_arch_include_sources ${SRCARCH} '*.[chS]'
+	find_arch_include_sources ${SRCARCH} '*.[chS]' '*.rs'
 	if [ -n "$archinclude" ]; then
-		find_arch_include_sources $archinclude '*.[chS]'
+		find_arch_include_sources $archinclude '*.[chS]' '*.rs'
 	fi
-	find_include_sources '*.[chS]'
+	find_include_sources '*.[chS]' '*.rs'
 	for arch in $ALLSOURCE_ARCHS
 	do
-		find_arch_sources $arch '*.[chS]'
+		find_arch_sources $arch '*.[chS]' '*.rs'
 	done
-	find_other_sources '*.[chS]'
+	find_other_sources '*.[chS]' '*.rs'
 }
 
 all_compiled_sources()
@@ -100,7 +122,7 @@ all_compiled_sources()
 	{
 		echo include/generated/autoconf.h
 		find $ignore -name "*.cmd" -exec \
-			grep -Poh '(?<=^  )\S+|(?<== )\S+[^\\](?=$)' {} \+ |
+			grep -Poh '(?<=^  )\S+\.([chS]|rs)(?=\s)|(?<== )\S+\.(?1)(?=$)' {} \+ |
 		awk '!a[$0]++'
 	} | xargs realpath -esq $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) |
 	sort -u
-- 
2.54.0