[PATCH v5 14/16] modules: Support extended MODVERSIONS info

Matthew Maurer posted 16 patches 2 months ago
There is a newer version of this series
[PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Matthew Maurer 2 months ago
Adds a new format for MODVERSIONS which stores each field in a separate
ELF section. This initially adds support for variable length names, but
could later be used to add additional fields to MODVERSIONS in a
backwards compatible way if needed. Any new fields will be ignored by
old user tooling, unlike the current format where user tooling cannot
tolerate adjustments to the format (for example making the name field
longer).

Since PPC munges its version records to strip leading dots, we reproduce
the munging for the new format. Other architectures do not appear to
have architecture-specific usage of this information.

Signed-off-by: Matthew Maurer <mmaurer@google.com>
---
 arch/powerpc/kernel/module_64.c | 23 ++++++++-
 kernel/module/internal.h        | 11 ++++
 kernel/module/main.c            | 92 ++++++++++++++++++++++++++++++---
 kernel/module/version.c         | 45 ++++++++++++++++
 4 files changed, 161 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index e9bab599d0c2..4e7b156dd8b2 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -355,6 +355,23 @@ static void dedotify_versions(struct modversion_info *vers,
 		}
 }
 
+static void dedotify_ext_version_names(char *str_seq, unsigned long size)
+{
+	unsigned long out = 0;
+	unsigned long in;
+	char last = '\0';
+
+	for (in = 0; in < size; in++) {
+		/* Skip one leading dot */
+		if (last == '\0' && str_seq[in] == '.')
+			in++;
+		last = str_seq[in];
+		str_seq[out++] = last;
+	}
+	/* Zero the trailing portion of the names table for robustness */
+	memset(&str_seq[out], 0, size - out);
+}
+
 /*
  * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
  * seem to be defined (value set later).
@@ -424,10 +441,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 			me->arch.toc_section = i;
 			if (sechdrs[i].sh_addralign < 8)
 				sechdrs[i].sh_addralign = 8;
-		}
-		else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+		} else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
 			dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
 					  sechdrs[i].sh_size);
+		else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0)
+			dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset,
+						   sechdrs[i].sh_size);
 
 		if (sechdrs[i].sh_type == SHT_SYMTAB)
 			dedotify((void *)hdr + sechdrs[i].sh_offset,
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index daef2be83902..59959c21b205 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -86,6 +86,8 @@ struct load_info {
 		unsigned int vers;
 		unsigned int info;
 		unsigned int pcpu;
+		unsigned int vers_ext_crc;
+		unsigned int vers_ext_name;
 	} index;
 };
 
@@ -389,6 +391,15 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kerne
 		   struct kernel_symbol *ks, struct tracepoint * const *tp);
 int check_modstruct_version(const struct load_info *info, struct module *mod);
 int same_magic(const char *amagic, const char *bmagic, bool has_crcs);
+struct modversion_info_ext {
+	size_t remaining;
+	const s32 *crc;
+	const char *name;
+};
+void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver);
+void modversion_ext_advance(struct modversion_info_ext *ver);
+#define for_each_modversion_info_ext(ver, info) \
+	for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver))
 #else /* !CONFIG_MODVERSIONS */
 static inline int check_version(const struct load_info *info,
 				const char *symname,
diff --git a/kernel/module/main.c b/kernel/module/main.c
index b40b632f00a6..9a9feca344f8 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2039,6 +2039,82 @@ static int elf_validity_cache_index_str(struct load_info *info)
 	return 0;
 }
 
+/**
+ * elf_validity_cache_index_versions() - Validate and cache version indices
+ * @info:  Load info to cache version indices in.
+ *         Must have &load_info->sechdrs and &load_info->secstrings populated.
+ * @flags: Load flags, relevant to suppress version loading, see
+ *         uapi/linux/module.h
+ *
+ * If we're ignoring modversions based on @flags, zero all version indices
+ * and return validity. Othewrise check:
+ *
+ * * If "__version_ext_crcs" is present, "__version_ext_names" is present
+ * * There is a name present for every crc
+ *
+ * Then populate:
+ *
+ * * &load_info->index.vers
+ * * &load_info->index.vers_ext_crc
+ * * &load_info->index.vers_ext_names
+ *
+ * if present.
+ *
+ * Return: %0 if valid, %-ENOEXEC on failure.
+ */
+static int elf_validity_cache_index_versions(struct load_info *info, int flags)
+{
+	unsigned int vers_ext_crc;
+	unsigned int vers_ext_name;
+	size_t crc_count;
+	size_t remaining_len;
+	size_t name_size;
+	char *name;
+
+	/* If modversions were suppressed, pretend we didn't find any */
+	if (flags & MODULE_INIT_IGNORE_MODVERSIONS) {
+		info->index.vers = 0;
+		info->index.vers_ext_crc = 0;
+		info->index.vers_ext_name = 0;
+		return 0;
+	}
+
+	vers_ext_crc = find_sec(info, "__version_ext_crcs");
+	vers_ext_name = find_sec(info, "__version_ext_names");
+
+	/* If we have one field, we must have the other */
+	if (!!vers_ext_crc != !!vers_ext_name) {
+		pr_err("extended version crc+name presence does not match");
+		return -ENOEXEC;
+	}
+
+	/*
+	 * If we have extended version information, we should have the same
+	 * number of entries in every section.
+	 */
+	if (vers_ext_crc) {
+		crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(s32);
+		name = (void *)info->hdr +
+			info->sechdrs[vers_ext_name].sh_offset;
+		remaining_len = info->sechdrs[vers_ext_name].sh_size;
+
+		while (crc_count--) {
+			name_size = strnlen(name, remaining_len) + 1;
+			if (name_size > remaining_len) {
+				pr_err("more extended version crcs than names");
+				return -ENOEXEC;
+			}
+			remaining_len -= name_size;
+			name += name_size;
+		}
+	}
+
+	info->index.vers = find_sec(info, "__versions");
+	info->index.vers_ext_crc = vers_ext_crc;
+	info->index.vers_ext_name = vers_ext_name;
+	return 0;
+}
+
 /**
  * elf_validity_cache_index() - Resolve, validate, cache section indices
  * @info:  Load info to read from and update.
@@ -2053,9 +2129,7 @@ static int elf_validity_cache_index_str(struct load_info *info)
  * * elf_validity_cache_index_mod()
  * * elf_validity_cache_index_sym()
  * * elf_validity_cache_index_str()
- *
- * If versioning is not suppressed via flags, load the version index from
- * a section called "__versions" with no validation.
+ * * elf_validity_cache_index_versions()
  *
  * If CONFIG_SMP is enabled, load the percpu section by name with no
  * validation.
@@ -2078,11 +2152,9 @@ static int elf_validity_cache_index(struct load_info *info, int flags)
 	err = elf_validity_cache_index_str(info);
 	if (err < 0)
 		return err;
-
-	if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
-		info->index.vers = 0; /* Pretend no __versions section! */
-	else
-		info->index.vers = find_sec(info, "__versions");
+	err = elf_validity_cache_index_versions(info, flags);
+	if (err < 0)
+		return err;
 
 	info->index.pcpu = find_pcpusec(info);
 
@@ -2293,6 +2365,10 @@ static int rewrite_section_headers(struct load_info *info, int flags)
 
 	/* Track but don't keep modinfo and version sections. */
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+	info->sechdrs[info->index.vers_ext_crc].sh_flags &=
+		~(unsigned long)SHF_ALLOC;
+	info->sechdrs[info->index.vers_ext_name].sh_flags &=
+		~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 
 	return 0;
diff --git a/kernel/module/version.c b/kernel/module/version.c
index 53f43ac5a73e..c246d4087970 100644
--- a/kernel/module/version.c
+++ b/kernel/module/version.c
@@ -19,11 +19,28 @@ int check_version(const struct load_info *info,
 	unsigned int versindex = info->index.vers;
 	unsigned int i, num_versions;
 	struct modversion_info *versions;
+	struct modversion_info_ext version_ext;
 
 	/* Exporting module didn't supply crcs?  OK, we're already tainted. */
 	if (!crc)
 		return 1;
 
+	/* If we have extended version info, rely on it */
+	if (info->index.vers_ext_crc) {
+		for_each_modversion_info_ext(version_ext, info) {
+			if (strcmp(version_ext.name, symname) != 0)
+				continue;
+			if (*version_ext.crc == *crc)
+				return 1;
+			pr_debug("Found checksum %X vs module %X\n",
+				 *crc, *version_ext.crc);
+			goto bad_version;
+		}
+		pr_warn_once("%s: no extended symbol version for %s\n",
+			     info->name, symname);
+		return 1;
+	}
+
 	/* No versions at all?  modprobe --force does this. */
 	if (versindex == 0)
 		return try_to_force_load(mod, symname) == 0;
@@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic,
 	return strcmp(amagic, bmagic) == 0;
 }
 
+void modversion_ext_start(const struct load_info *info,
+			  struct modversion_info_ext *start)
+{
+	unsigned int crc_idx = info->index.vers_ext_crc;
+	unsigned int name_idx = info->index.vers_ext_name;
+	Elf_Shdr *sechdrs = info->sechdrs;
+
+	/*
+	 * Both of these fields are needed for this to be useful
+	 * Any future fields should be initialized to NULL if absent.
+	 */
+	if (crc_idx == 0 || name_idx == 0) {
+		start->remaining = 0;
+		return;
+	}
+
+	start->crc = (const s32 *)sechdrs[crc_idx].sh_addr;
+	start->name = (const char *)sechdrs[name_idx].sh_addr;
+	start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc);
+}
+
+void modversion_ext_advance(struct modversion_info_ext *vers)
+{
+	vers->remaining--;
+	vers->crc++;
+	vers->name += strlen(vers->name) + 1;
+}
+
 /*
  * Generate the signature for all relevant module structures here.
  * If these change, we don't want to try to parse the module.
-- 
2.46.1.824.gd892dcdcdd-goog
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 2 weeks ago
On Wed, Sep 25, 2024 at 11:38:29PM +0000, Matthew Maurer wrote:
> Adds a new format for MODVERSIONS which stores each field in a separate
> ELF section. This initially adds support for variable length names, but
> could later be used to add additional fields to MODVERSIONS in a
> backwards compatible way if needed. Any new fields will be ignored by
> old user tooling, unlike the current format where user tooling cannot
> tolerate adjustments to the format (for example making the name field
> longer).
> 
> Since PPC munges its version records to strip leading dots, we reproduce
> the munging for the new format. Other architectures do not appear to
> have architecture-specific usage of this information.
> 
> Signed-off-by: Matthew Maurer <mmaurer@google.com>

I'm all for the ELF validation work so far, all that was nice, thanks
for all that tidying up. This however is not considering when we really
need all this at all, and not making it specific to the build times when
such things are needed. That is, yes I'd like to see the need for this
clearly explicitly defined through Kconfig, a *select FOO_FEATURE* for
when this is needed. No need to extend a module with bloat if we don't
need it, likewise if a kernel was built without needing those things,
why bloat the modules with the extra information?

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Matthew Maurer 1 month, 2 weeks ago
On Fri, Oct 11, 2024 at 3:22 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Wed, Sep 25, 2024 at 11:38:29PM +0000, Matthew Maurer wrote:
> > Adds a new format for MODVERSIONS which stores each field in a separate
> > ELF section. This initially adds support for variable length names, but
> > could later be used to add additional fields to MODVERSIONS in a
> > backwards compatible way if needed. Any new fields will be ignored by
> > old user tooling, unlike the current format where user tooling cannot
> > tolerate adjustments to the format (for example making the name field
> > longer).
> >
> > Since PPC munges its version records to strip leading dots, we reproduce
> > the munging for the new format. Other architectures do not appear to
> > have architecture-specific usage of this information.
> >
> > Signed-off-by: Matthew Maurer <mmaurer@google.com>
>
> I'm all for the ELF validation work so far, all that was nice, thanks
> for all that tidying up. This however is not considering when we really
> need all this at all, and not making it specific to the build times when
> such things are needed. That is, yes I'd like to see the need for this
> clearly explicitly defined through Kconfig, a *select FOO_FEATURE* for
> when this is needed. No need to extend a module with bloat if we don't
> need it, likewise if a kernel was built without needing those things,
> why bloat the modules with the extra information?

To make sure I understand what you're asking for, are you suggesting:
1. A config flag for supporting parsing the extended format
2. A config flag for supporting parsing the existing format
3. A config flag for putting the extended format into produced modules
4. A config flag for putting the existing format into produced modules
or some combination of the above?

I'm currently reading this as #3, but figured I'd ask to be certain.

>
>   Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 2 weeks ago
On Fri, Oct 11, 2024 at 03:27:30PM -0700, Matthew Maurer wrote:
> On Fri, Oct 11, 2024 at 3:22 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Wed, Sep 25, 2024 at 11:38:29PM +0000, Matthew Maurer wrote:
> > > Adds a new format for MODVERSIONS which stores each field in a separate
> > > ELF section. This initially adds support for variable length names, but
> > > could later be used to add additional fields to MODVERSIONS in a
> > > backwards compatible way if needed. Any new fields will be ignored by
> > > old user tooling, unlike the current format where user tooling cannot
> > > tolerate adjustments to the format (for example making the name field
> > > longer).
> > >
> > > Since PPC munges its version records to strip leading dots, we reproduce
> > > the munging for the new format. Other architectures do not appear to
> > > have architecture-specific usage of this information.
> > >
> > > Signed-off-by: Matthew Maurer <mmaurer@google.com>
> >
> > I'm all for the ELF validation work so far, all that was nice, thanks
> > for all that tidying up. This however is not considering when we really
> > need all this at all, and not making it specific to the build times when
> > such things are needed. That is, yes I'd like to see the need for this
> > clearly explicitly defined through Kconfig, a *select FOO_FEATURE* for
> > when this is needed. No need to extend a module with bloat if we don't
> > need it, likewise if a kernel was built without needing those things,
> > why bloat the modules with the extra information?
> 
> To make sure I understand what you're asking for, are you suggesting:
> 1. A config flag for supporting parsing the extended format
> 2. A config flag for supporting parsing the existing format
> 3. A config flag for putting the extended format into produced modules
> 4. A config flag for putting the existing format into produced modules
> or some combination of the above?
> 
> I'm currently reading this as #3, but figured I'd ask to be certain.

3), but if your kernel build does not require these extra things, then
a simple if !(IS_ENABLED) sanity check could be put in place to avoid
processing the information if the kernel didn't need it. It's a one line
change. So at run time, we build the same kernel with all that code in,
but it makes no sense to be processing modules with that stuff if
kernels did not need it.

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 2 weeks ago
Also, just as I asked Sami, coould you split this up into patch sets?
One with all the cleanups and elf validation code shifts. And then the
other code. That will let me pick up quickly the first patch set.

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 2 weeks ago
On Fri, Oct 11, 2024 at 04:45:25PM -0700, Luis Chamberlain wrote:
> 
> Also, just as I asked Sami, coould you split this up into patch sets?
> One with all the cleanups and elf validation code shifts. And then the
> other code. That will let me pick up quickly the first patch set.

Oh and if you can think of ways to enhance our test covereage on all
this as I noted to Sami, it would be greatly appreciated.

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Matthew Maurer 1 month, 2 weeks ago
So, the basic things I can think of to test here are:

1. The kernel can still load the previous MODVERSIONS format
2. The kernel can load the new MODVERSIONS format
3. If we artificially tweak a CRC in the previous format, it will fail to load.
4. If we artificially tweak a CRC in the new format, it will fail to load.
5. With CONFIG_EXTENDED_MODVERSIONS enabled, the kernel will build and
load modules with long symbol names, with MODVERSIONS enabled.

Is there anything else you were thinking of here, or are those the
kinds of checks you were envisioning?

On Fri, Oct 11, 2024 at 4:46 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Fri, Oct 11, 2024 at 04:45:25PM -0700, Luis Chamberlain wrote:
> >
> > Also, just as I asked Sami, coould you split this up into patch sets?
> > One with all the cleanups and elf validation code shifts. And then the
> > other code. That will let me pick up quickly the first patch set.
>
> Oh and if you can think of ways to enhance our test covereage on all
> this as I noted to Sami, it would be greatly appreciated.
>
>   Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 1 week ago
On Tue, Oct 15, 2024 at 04:22:22PM -0700, Matthew Maurer wrote:
> So, the basic things I can think of to test here are:
> 
> 1. The kernel can still load the previous MODVERSIONS format
> 2. The kernel can load the new MODVERSIONS format
> 3. If we artificially tweak a CRC in the previous format, it will fail to load.
> 4. If we artificially tweak a CRC in the new format, it will fail to load.
> 5. With CONFIG_EXTENDED_MODVERSIONS enabled, the kernel will build and
> load modules with long symbol names, with MODVERSIONS enabled.
> 
> Is there anything else you were thinking of here, or are those the
> kinds of checks you were envisioning?

That sounds great. Yeah, the above would be great to test. A while ago
I wrote a new modules selftests in order to test possible improvements
on find_symbol() but I also did this due to push the limits of the
numbers of symbols we could support. I wrote all this to also test the
possible 64-bit alignment benefits of __ksymtab_ sections on
architectures without CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (e.g. ppc64,
ppc64le, parisc, s390x,...). But come to think of it, you might be
able to easily leverage this to also just test long symbols by self
generated symbols as another test case. In case its useful to you I've
put this in a rebased branch 20241016-modules-symtab branch. Feel free
to use as you see fit.

I forget what we concluded on Helge Deller's alignement patches, I think
there was an idea on how to address the alignment through other means.

[0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20241016-modules-symtab

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Helge Deller 1 month, 1 week ago
Hi Luis,

On 10/17/24 01:21, Luis Chamberlain wrote:
> That sounds great. Yeah, the above would be great to test. A while ago
> I wrote a new modules selftests in order to test possible improvements
> on find_symbol() but I also did this due to push the limits of the
> numbers of symbols we could support. I wrote all this to also test the
> possible 64-bit alignment benefits of __ksymtab_ sections on
> architectures without CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (e.g. ppc64,
> ppc64le, parisc, s390x,...). [....]
>
> I forget what we concluded on Helge Deller's alignement patches, I think
> there was an idea on how to address the alignment through other means.
>
> [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20241016-modules-symtab

I stumbled upon the unaligned-memory-access.rst document [1].
Please read it, as it is a really good document, and the section
"Why unaligned access is bad" states:
It should be obvious from the above that if your code causes unaligned
memory accesses to happen, your code will not work correctly on certain
platforms and will cause performance problems on others.

With this in mind, you really should apply both of my alignment
patches which you currently carry in [0].

For parisc I partly solved the issue by fixing the arch-specific kernel unalignment
handler, but every time module sections are stored unaligned, it triggers
performance degregation on parisc (and other sensitive platforms).

I suggest you apply them unconditionally.

Helge

[1]  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/unaligned-memory-access.rst
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 1 week ago
On Thu, Oct 17, 2024 at 02:08:19PM +0200, Helge Deller wrote:
> Hi Luis,
> 
> On 10/17/24 01:21, Luis Chamberlain wrote:
> > That sounds great. Yeah, the above would be great to test. A while ago
> > I wrote a new modules selftests in order to test possible improvements
> > on find_symbol() but I also did this due to push the limits of the
> > numbers of symbols we could support. I wrote all this to also test the
> > possible 64-bit alignment benefits of __ksymtab_ sections on
> > architectures without CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (e.g. ppc64,
> > ppc64le, parisc, s390x,...). [....]
> > 
> > I forget what we concluded on Helge Deller's alignement patches, I think
> > there was an idea on how to address the alignment through other means.
> > 
> > [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20241016-modules-symtab
> 
> I stumbled upon the unaligned-memory-access.rst document [1].
> Please read it, as it is a really good document, and the section
> "Why unaligned access is bad" states:
> It should be obvious from the above that if your code causes unaligned
> memory accesses to happen, your code will not work correctly on certain
> platforms and will cause performance problems on others.
> 
> With this in mind, you really should apply both of my alignment
> patches which you currently carry in [0].
> 
> For parisc I partly solved the issue by fixing the arch-specific kernel unalignment
> handler, but every time module sections are stored unaligned, it triggers
> performance degregation on parisc (and other sensitive platforms).
> 
> I suggest you apply them unconditionally.
> 
> Helge
> 
> [1]  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/unaligned-memory-access.rst

You're right, I've just referred to that doc and pushed to the new
linux modules [2] modules-next branch. This is also great timing so
that the work that is ongoing for Rust will take this into
consideration as well. I'll just post the test I wrote as separate
thing but it surely can be used to help test some of this later.

[2] git://git.kernel.org/pub/scm/linux/kernel/git/modules/linux.git

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Luis Chamberlain 1 month, 1 week ago
On Sat, Oct 19, 2024 at 01:45:35PM -0700, Luis Chamberlain wrote:
> On Thu, Oct 17, 2024 at 02:08:19PM +0200, Helge Deller wrote:
> > Hi Luis,
> > 
> > On 10/17/24 01:21, Luis Chamberlain wrote:
> > > That sounds great. Yeah, the above would be great to test. A while ago
> > > I wrote a new modules selftests in order to test possible improvements
> > > on find_symbol() but I also did this due to push the limits of the
> > > numbers of symbols we could support. I wrote all this to also test the
> > > possible 64-bit alignment benefits of __ksymtab_ sections on
> > > architectures without CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (e.g. ppc64,
> > > ppc64le, parisc, s390x,...). [....]
> > > 
> > > I forget what we concluded on Helge Deller's alignement patches, I think
> > > there was an idea on how to address the alignment through other means.
> > > 
> > > [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20241016-modules-symtab
> > 
> > I stumbled upon the unaligned-memory-access.rst document [1].
> > Please read it, as it is a really good document, and the section
> > "Why unaligned access is bad" states:
> > It should be obvious from the above that if your code causes unaligned
> > memory accesses to happen, your code will not work correctly on certain
> > platforms and will cause performance problems on others.
> > 
> > With this in mind, you really should apply both of my alignment
> > patches which you currently carry in [0].
> > 
> > For parisc I partly solved the issue by fixing the arch-specific kernel unalignment
> > handler, but every time module sections are stored unaligned, it triggers
> > performance degregation on parisc (and other sensitive platforms).
> > 
> > I suggest you apply them unconditionally.
> > 
> > Helge
> > 
> > [1]  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/unaligned-memory-access.rst
> 
> You're right, I've just referred to that doc and pushed to the new
> linux modules [2] modules-next branch. This is also great timing so
> that the work that is ongoing for Rust will take this into
> consideration as well. I'll just post the test I wrote as separate
> thing but it surely can be used to help test some of this later.
> 
> [2] git://git.kernel.org/pub/scm/linux/kernel/git/modules/linux.git

Helge,

I went down memory lane and noted Masahiro asked for this to be done
in asm so I droped your patches. Feel free to post a new iteration.

  Luis
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Christophe Leroy 1 month, 1 week ago

Le 17/10/2024 à 01:21, Luis Chamberlain a écrit :
> On Tue, Oct 15, 2024 at 04:22:22PM -0700, Matthew Maurer wrote:
>> So, the basic things I can think of to test here are:
>>
>> 1. The kernel can still load the previous MODVERSIONS format
>> 2. The kernel can load the new MODVERSIONS format
>> 3. If we artificially tweak a CRC in the previous format, it will fail to load.
>> 4. If we artificially tweak a CRC in the new format, it will fail to load.
>> 5. With CONFIG_EXTENDED_MODVERSIONS enabled, the kernel will build and
>> load modules with long symbol names, with MODVERSIONS enabled.
>>
>> Is there anything else you were thinking of here, or are those the
>> kinds of checks you were envisioning?
> 
> That sounds great. Yeah, the above would be great to test. A while ago
> I wrote a new modules selftests in order to test possible improvements
> on find_symbol() but I also did this due to push the limits of the
> numbers of symbols we could support. I wrote all this to also test the
> possible 64-bit alignment benefits of __ksymtab_ sections on
> architectures without CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (e.g. ppc64,
> ppc64le, parisc, s390x,...). But come to think of it, you might be
> able to easily leverage this to also just test long symbols by self
> generated symbols as another test case. In case its useful to you I've
> put this in a rebased branch 20241016-modules-symtab branch. Feel free
> to use as you see fit.

By reading this, I discovered that was initially added to powerpc by 
commit 271ca788774a ("arch: enable relative relocations for arm64, power 
and x86") and then removed due to problem with modules, see commit 
ff69279a44e9 ("powerpc: disable support for relative ksymtab references")

Wouldn't it be better to try and fix modules and activate again 
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS on powerpc ?

Christophe
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Christophe Leroy 2 months ago

Le 26/09/2024 à 01:38, Matthew Maurer a écrit :
> Adds a new format for MODVERSIONS which stores each field in a separate
> ELF section. This initially adds support for variable length names, but
> could later be used to add additional fields to MODVERSIONS in a
> backwards compatible way if needed. Any new fields will be ignored by
> old user tooling, unlike the current format where user tooling cannot
> tolerate adjustments to the format (for example making the name field
> longer).
> 
> Since PPC munges its version records to strip leading dots, we reproduce
> the munging for the new format. Other architectures do not appear to
> have architecture-specific usage of this information.
> 
> Signed-off-by: Matthew Maurer <mmaurer@google.com>
> ---
>   arch/powerpc/kernel/module_64.c | 23 ++++++++-
>   kernel/module/internal.h        | 11 ++++
>   kernel/module/main.c            | 92 ++++++++++++++++++++++++++++++---
>   kernel/module/version.c         | 45 ++++++++++++++++
>   4 files changed, 161 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
> index e9bab599d0c2..4e7b156dd8b2 100644
> --- a/arch/powerpc/kernel/module_64.c
> +++ b/arch/powerpc/kernel/module_64.c
> @@ -355,6 +355,23 @@ static void dedotify_versions(struct modversion_info *vers,
>   		}
>   }
>   
> +static void dedotify_ext_version_names(char *str_seq, unsigned long size)
> +{
> +	unsigned long out = 0;
> +	unsigned long in;
> +	char last = '\0';
> +
> +	for (in = 0; in < size; in++) {
> +		/* Skip one leading dot */
> +		if (last == '\0' && str_seq[in] == '.')
> +			in++;
> +		last = str_seq[in];
> +		str_seq[out++] = last;
> +	}

Why do you need a loop here ?

Can't you just do something like:

	if (str_seq[0] == '.')
		memmove(str_seq, str_seq + 1, size);


> +	/* Zero the trailing portion of the names table for robustness */
> +	memset(&str_seq[out], 0, size - out);

This seems unneeded.

> +}
> +
>   /*
>    * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
>    * seem to be defined (value set later).



Christophe
Re: [PATCH v5 14/16] modules: Support extended MODVERSIONS info
Posted by Sami Tolvanen 2 months ago
On Thu, Sep 26, 2024 at 5:22 AM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 26/09/2024 à 01:38, Matthew Maurer a écrit :
> > Adds a new format for MODVERSIONS which stores each field in a separate
> > ELF section. This initially adds support for variable length names, but
> > could later be used to add additional fields to MODVERSIONS in a
> > backwards compatible way if needed. Any new fields will be ignored by
> > old user tooling, unlike the current format where user tooling cannot
> > tolerate adjustments to the format (for example making the name field
> > longer).
> >
> > Since PPC munges its version records to strip leading dots, we reproduce
> > the munging for the new format. Other architectures do not appear to
> > have architecture-specific usage of this information.
> >
> > Signed-off-by: Matthew Maurer <mmaurer@google.com>
> > ---
> >   arch/powerpc/kernel/module_64.c | 23 ++++++++-
> >   kernel/module/internal.h        | 11 ++++
> >   kernel/module/main.c            | 92 ++++++++++++++++++++++++++++++---
> >   kernel/module/version.c         | 45 ++++++++++++++++
> >   4 files changed, 161 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
> > index e9bab599d0c2..4e7b156dd8b2 100644
> > --- a/arch/powerpc/kernel/module_64.c
> > +++ b/arch/powerpc/kernel/module_64.c
> > @@ -355,6 +355,23 @@ static void dedotify_versions(struct modversion_info *vers,
> >               }
> >   }
> >
> > +static void dedotify_ext_version_names(char *str_seq, unsigned long size)
> > +{
> > +     unsigned long out = 0;
> > +     unsigned long in;
> > +     char last = '\0';
> > +
> > +     for (in = 0; in < size; in++) {
> > +             /* Skip one leading dot */
> > +             if (last == '\0' && str_seq[in] == '.')
> > +                     in++;
> > +             last = str_seq[in];
> > +             str_seq[out++] = last;
> > +     }
>
> Why do you need a loop here ?
>
> Can't you just do something like:
>
>         if (str_seq[0] == '.')
>                 memmove(str_seq, str_seq + 1, size);

I initially had the same thought, but it's because this is is a
sequence of multiple null-terminated strings, and we need to dedotify
all of them, not just the first one. Here's an example:

https://godbolt.org/z/avMGnd48M

> > +     /* Zero the trailing portion of the names table for robustness */
> > +     memset(&str_seq[out], 0, size - out);
>
> This seems unneeded.

Strictly speaking it shouldn't be needed, but I think it's still good
hygiene to not leave another null-terminated fragment at the end.

Sami