[PATCH v2 4/7] scripts: generate_rust_analyzer.py: add type hints

Tamir Duberstein posted 7 patches 9 months, 1 week ago
There is a newer version of this series
[PATCH v2 4/7] scripts: generate_rust_analyzer.py: add type hints
Posted by Tamir Duberstein 9 months, 1 week ago
Python type hints allow static analysis tools like mypy to detect type
errors during development, improving the developer experience.

Python type hints have been present in the kernel since 2019 at the
latest; see commit 6ebf5866f2e8 ("kunit: tool: add Python wrappers for
running KUnit tests").

Run `uv tool run mypy --strict scripts/generate_rust_analyzer.py` to
verify.

This removes `"is_proc_macro": false` from `rust-project.json` in
exchange for stricter types. This field is interpreted as false if
absent[1] so this doesn't change the behavior of rust-analyzer.

Link: https://github.com/rust-lang/rust-analyzer/blob/8d01570b5e812a49daa1f08404269f6ea5dd73a1/crates/project-model/src/project_json.rs#L372-L373 [1]
Signed-off-by: Tamir Duberstein <tamird@gmail.com>
---
 scripts/generate_rust_analyzer.py | 130 ++++++++++++++++++++++++++++----------
 1 file changed, 96 insertions(+), 34 deletions(-)

diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index 7e78b926e61f..c73ea8d116a4 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -10,8 +10,10 @@ import os
 import pathlib
 import subprocess
 import sys
+import typing as T
 
-def args_crates_cfgs(cfgs):
+
+def args_crates_cfgs(cfgs: T.Iterable[str]) -> dict[str, list[str]]:
     crates_cfgs = {}
     for cfg in cfgs:
         crate, vals = cfg.split("=", 1)
@@ -19,7 +21,45 @@ def args_crates_cfgs(cfgs):
 
     return crates_cfgs
 
-def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
+
+class Dependency(T.TypedDict):
+    crate: int
+    name: str
+
+
+class Source(T.TypedDict):
+    include_dirs: list[str]
+    exclude_dirs: list[str]
+
+
+class Crate(T.TypedDict):
+    display_name: str
+    root_module: str
+    is_workspace_member: bool
+    deps: list[Dependency]
+    cfg: list[str]
+    edition: T.Literal["2021"]
+    env: dict[str, str]
+
+
+# `T.NotRequired` fields on `Crate` would be better but `T.NotRequired` was added in 3.11.
+class ProcMacroCrate(Crate):
+    is_proc_macro: T.Literal[True]
+    proc_macro_dylib_path: T.Optional[str]  # `pathlib.Path` is not JSON serializable.
+
+
+# `T.NotRequired` fields on `Crate` would be better but `T.NotRequired` was added in 3.11.
+class CrateWithGenerated(Crate):
+    source: T.Optional[Source]
+
+
+def generate_crates(
+    srctree: pathlib.Path,
+    objtree: pathlib.Path,
+    sysroot_src: pathlib.Path,
+    external_src: pathlib.Path,
+    cfgs: list[str],
+) -> list[Crate]:
     # Generate the configuration list.
     cfg = []
     with open(objtree / "include" / "generated" / "rustc_cfg") as fd:
@@ -31,37 +71,53 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
     # Now fill the crates list -- dependencies need to come first.
     #
     # Avoid O(n^2) iterations by keeping a map of indexes.
-    crates = []
-    crates_indexes = {}
+    crates: list[Crate] = []
+    crates_indexes: dict[str, int] = {}
     crates_cfgs = args_crates_cfgs(cfgs)
 
-    def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False):
-        crate = {
+    def append_crate(
+        display_name: str,
+        root_module: pathlib.Path,
+        deps: list[str],
+        cfg: list[str] = [],
+        is_workspace_member: bool = True,
+    ) -> None:
+        crates_indexes[display_name] = len(crates)
+        crates.append({
             "display_name": display_name,
             "root_module": str(root_module),
             "is_workspace_member": is_workspace_member,
-            "is_proc_macro": is_proc_macro,
             "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps],
             "cfg": cfg,
             "edition": "2021",
             "env": {
                 "RUST_MODFILE": "This is only for rust-analyzer"
-            }
+            },
+        })
+
+    def append_proc_macro_crate(
+        display_name: str,
+        root_module: pathlib.Path,
+        deps: list[str],
+        cfg: list[str] = [],
+    ) -> None:
+        append_crate(display_name, root_module, deps, cfg)
+        proc_macro_dylib_name = subprocess.check_output(
+            [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],
+            stdin=subprocess.DEVNULL,
+        ).decode('utf-8').strip()
+        crate: ProcMacroCrate = {
+            **crates[-1],
+            "is_proc_macro": True,
+            "proc_macro_dylib_path": f"{objtree}/rust/{proc_macro_dylib_name}",
         }
-        if is_proc_macro:
-            proc_macro_dylib_name = subprocess.check_output(
-                [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],
-                stdin=subprocess.DEVNULL,
-            ).decode('utf-8').strip()
-            crate["proc_macro_dylib_path"] = f"{objtree}/rust/{proc_macro_dylib_name}"
-        crates_indexes[display_name] = len(crates)
-        crates.append(crate)
+        crates[-1] = crate
 
     def append_sysroot_crate(
-        display_name,
-        deps,
-        cfg=[],
-    ):
+        display_name: str,
+        deps: list[str],
+        cfg: list[str] = [],
+    ) -> None:
         append_crate(
             display_name,
             sysroot_src / display_name / "src" / "lib.rs",
@@ -84,11 +140,10 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
         [],
     )
 
-    append_crate(
+    append_proc_macro_crate(
         "macros",
         srctree / "rust" / "macros" / "lib.rs",
         ["std", "proc_macro"],
-        is_proc_macro=True,
     )
 
     append_crate(
@@ -98,9 +153,9 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
     )
 
     def append_crate_with_generated(
-        display_name,
-        deps,
-    ):
+        display_name: str,
+        deps: list[str],
+    ) -> None:
         append_crate(
             display_name,
             srctree / "rust" / display_name / "lib.rs",
@@ -108,19 +163,23 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
             cfg=cfg,
         )
         crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True))
-        crates[-1]["source"] = {
-            "include_dirs": [
-                str(srctree / "rust" / display_name),
-                str(objtree / "rust")
-            ],
-            "exclude_dirs": [],
+        crate: CrateWithGenerated = {
+            **crates[-1],
+            "source": {
+                "include_dirs": [
+                    str(srctree / "rust" / display_name),
+                    str(objtree / "rust")
+                ],
+                "exclude_dirs": [],
+            }
         }
+        crates[-1] = crate
 
     append_crate_with_generated("bindings", ["core"])
     append_crate_with_generated("uapi", ["core"])
     append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings", "uapi"])
 
-    def is_root_crate(build_file, target):
+    def is_root_crate(build_file: pathlib.Path, target: str) -> bool:
         try:
             return f"{target}.o" in open(build_file).read()
         except FileNotFoundError:
@@ -129,7 +188,9 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
     # Then, the rest outside of `rust/`.
     #
     # We explicitly mention the top-level folders we want to cover.
-    extra_dirs = map(lambda dir: srctree / dir, ("samples", "drivers"))
+    extra_dirs: T.Iterable[pathlib.Path] = map(
+        lambda dir: srctree / dir, ("samples", "drivers")
+    )
     if external_src is not None:
         extra_dirs = [external_src]
     for folder in extra_dirs:
@@ -152,7 +213,8 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
 
     return crates
 
-def main():
+
+def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--verbose", "-v", action="store_true")
     parser.add_argument("--cfgs", action="append", default=[])

-- 
2.48.1
Re: [PATCH v2 4/7] scripts: generate_rust_analyzer.py: add type hints
Posted by Trevor Gross 9 months ago
On Tue, Mar 11, 2025 at 9:18 PM Tamir Duberstein <tamird@gmail.com> wrote:
>
> Python type hints allow static analysis tools like mypy to detect type
> errors during development, improving the developer experience.
>
> Python type hints have been present in the kernel since 2019 at the
> latest; see commit 6ebf5866f2e8 ("kunit: tool: add Python wrappers for
> running KUnit tests").
>
> Run `uv tool run mypy --strict scripts/generate_rust_analyzer.py` to
> verify.

From the discussion, it may be better to instead mention the direct
invocation (without uv).

Could you also mention the target min version? Since apparently the
kernel has a spread. It looks like maybe 3.8 based on what is used
here.

> This removes `"is_proc_macro": false` from `rust-project.json` in
> exchange for stricter types. This field is interpreted as false if
> absent[1] so this doesn't change the behavior of rust-analyzer.
>
> Link: https://github.com/rust-lang/rust-analyzer/blob/8d01570b5e812a49daa1f08404269f6ea5dd73a1/crates/project-model/src/project_json.rs#L372-L373 [1]
> Signed-off-by: Tamir Duberstein <tamird@gmail.com>
> ---
>  scripts/generate_rust_analyzer.py | 130 ++++++++++++++++++++++++++++----------
>  1 file changed, 96 insertions(+), 34 deletions(-)
>
> diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
> index 7e78b926e61f..c73ea8d116a4 100755
> --- a/scripts/generate_rust_analyzer.py
> +++ b/scripts/generate_rust_analyzer.py
> @@ -10,8 +10,10 @@ import os
>  import pathlib
>  import subprocess
>  import sys
> +import typing as T

Nit: is there any need to keep everything namespaced? I think it
should be fine to import `Iterable` `TypedDict` etc directly since
they aren't confusable.

Same for `pathlib.Path` since there is no other `Path` (some of that
is preexisting).

> +    def append_proc_macro_crate(
> +        display_name: str,
> +        root_module: pathlib.Path,
> +        deps: list[str],
> +        cfg: list[str] = [],
> +    ) -> None:
> +        append_crate(display_name, root_module, deps, cfg)
> +        proc_macro_dylib_name = subprocess.check_output(
> +            [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],

Nit, may as well use this opportunity to wrap the line.

> +            stdin=subprocess.DEVNULL,
> +        ).decode('utf-8').strip()
> +        crate: ProcMacroCrate = {
> +            **crates[-1],
> +            "is_proc_macro": True,
> +            "proc_macro_dylib_path": f"{objtree}/rust/{proc_macro_dylib_name}",
>          }
> -        if is_proc_macro:
> -            proc_macro_dylib_name = subprocess.check_output(
> -                [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],
> -                stdin=subprocess.DEVNULL,
> -            ).decode('utf-8').strip()
> -            crate["proc_macro_dylib_path"] = f"{objtree}/rust/{proc_macro_dylib_name}"
> -        crates_indexes[display_name] = len(crates)
> -        crates.append(crate)
> +        crates[-1] = crate

The unpacking is a bit confusing here, can `crates[-1]` just be set
rather than duplicating and replacing it?

Maybe the body of `append_crate` should be `build_crate(...) -> Crate`
(which could then be a top-level function), then `append_crate`,
`append_crate_with_generated`, etc call that and handle modification /
appending themselves.

> +        crate: CrateWithGenerated = {
> +            **crates[-1],
> +            "source": {
> +                "include_dirs": [
> +                    str(srctree / "rust" / display_name),
> +                    str(objtree / "rust")
> +                ],
> +                "exclude_dirs": [],
> +            }
>          }
> +        crates[-1] = crate

Same note as above regarding rebuilding the last item.

- Trevor
Re: [PATCH v2 4/7] scripts: generate_rust_analyzer.py: add type hints
Posted by Tamir Duberstein 9 months ago
On Wed, Mar 19, 2025 at 5:25 PM Trevor Gross <tmgross@umich.edu> wrote:
>
> On Tue, Mar 11, 2025 at 9:18 PM Tamir Duberstein <tamird@gmail.com> wrote:
> >
> > Python type hints allow static analysis tools like mypy to detect type
> > errors during development, improving the developer experience.
> >
> > Python type hints have been present in the kernel since 2019 at the
> > latest; see commit 6ebf5866f2e8 ("kunit: tool: add Python wrappers for
> > running KUnit tests").
> >
> > Run `uv tool run mypy --strict scripts/generate_rust_analyzer.py` to
> > verify.
>
> From the discussion, it may be better to instead mention the direct
> invocation (without uv).

👍 done.

> Could you also mention the target min version? Since apparently the
> kernel has a spread. It looks like maybe 3.8 based on what is used
> here.

Added, it is now 3.8 because mypy doesn't support anything lower.

>
> > This removes `"is_proc_macro": false` from `rust-project.json` in
> > exchange for stricter types. This field is interpreted as false if
> > absent[1] so this doesn't change the behavior of rust-analyzer.
> >
> > Link: https://github.com/rust-lang/rust-analyzer/blob/8d01570b5e812a49daa1f08404269f6ea5dd73a1/crates/project-model/src/project_json.rs#L372-L373 [1]
> > Signed-off-by: Tamir Duberstein <tamird@gmail.com>
> > ---
> >  scripts/generate_rust_analyzer.py | 130 ++++++++++++++++++++++++++++----------
> >  1 file changed, 96 insertions(+), 34 deletions(-)
> >
> > diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
> > index 7e78b926e61f..c73ea8d116a4 100755
> > --- a/scripts/generate_rust_analyzer.py
> > +++ b/scripts/generate_rust_analyzer.py
> > @@ -10,8 +10,10 @@ import os
> >  import pathlib
> >  import subprocess
> >  import sys
> > +import typing as T
>
> Nit: is there any need to keep everything namespaced? I think it
> should be fine to import `Iterable` `TypedDict` etc directly since
> they aren't confusable.

I saw this style in another project and liked it, but I don't feel
strongly. Changed.

>
> Same for `pathlib.Path` since there is no other `Path` (some of that
> is preexisting).

I'll leave this one as is to avoid more churn.

>
> > +    def append_proc_macro_crate(
> > +        display_name: str,
> > +        root_module: pathlib.Path,
> > +        deps: list[str],
> > +        cfg: list[str] = [],
> > +    ) -> None:
> > +        append_crate(display_name, root_module, deps, cfg)
> > +        proc_macro_dylib_name = subprocess.check_output(
> > +            [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],
>
> Nit, may as well use this opportunity to wrap the line.

I considered that, but when you use git with `-w` this line doesn't
change, so I'd rather leave it.

>
> > +            stdin=subprocess.DEVNULL,
> > +        ).decode('utf-8').strip()
> > +        crate: ProcMacroCrate = {
> > +            **crates[-1],
> > +            "is_proc_macro": True,
> > +            "proc_macro_dylib_path": f"{objtree}/rust/{proc_macro_dylib_name}",
> >          }
> > -        if is_proc_macro:
> > -            proc_macro_dylib_name = subprocess.check_output(
> > -                [os.environ["RUSTC"], "--print", "file-names", "--crate-name", display_name, "--crate-type", "proc-macro", "-"],
> > -                stdin=subprocess.DEVNULL,
> > -            ).decode('utf-8').strip()
> > -            crate["proc_macro_dylib_path"] = f"{objtree}/rust/{proc_macro_dylib_name}"
> > -        crates_indexes[display_name] = len(crates)
> > -        crates.append(crate)
> > +        crates[-1] = crate
>
> The unpacking is a bit confusing here, can `crates[-1]` just be set
> rather than duplicating and replacing it?
>
> Maybe the body of `append_crate` should be `build_crate(...) -> Crate`
> (which could then be a top-level function), then `append_crate`,
> `append_crate_with_generated`, etc call that and handle modification /
> appending themselves.

This is a nice improvement. Done.

> > +        crate: CrateWithGenerated = {
> > +            **crates[-1],
> > +            "source": {
> > +                "include_dirs": [
> > +                    str(srctree / "rust" / display_name),
> > +                    str(objtree / "rust")
> > +                ],
> > +                "exclude_dirs": [],
> > +            }
> >          }
> > +        crates[-1] = crate

👍

>
> - Trevor

Thanks for the review!
Re: [PATCH v2 4/7] scripts: generate_rust_analyzer.py: add type hints
Posted by Tamir Duberstein 9 months, 1 week ago
On Tue, Mar 11, 2025 at 9:17 PM Tamir Duberstein <tamird@gmail.com> wrote:
>
> Python type hints allow static analysis tools like mypy to detect type
> errors during development, improving the developer experience.
>
> Python type hints have been present in the kernel since 2019 at the
> latest; see commit 6ebf5866f2e8 ("kunit: tool: add Python wrappers for
> running KUnit tests").
>
> Run `uv tool run mypy --strict scripts/generate_rust_analyzer.py` to
> verify.
>
> This removes `"is_proc_macro": false` from `rust-project.json` in
> exchange for stricter types. This field is interpreted as false if
> absent[1] so this doesn't change the behavior of rust-analyzer.
>
> Link: https://github.com/rust-lang/rust-analyzer/blob/8d01570b5e812a49daa1f08404269f6ea5dd73a1/crates/project-model/src/project_json.rs#L372-L373 [1]
> Signed-off-by: Tamir Duberstein <tamird@gmail.com>

Gentle ping. Please have a look if you aren't intimidated by python
type hints :)