[v3] lib/base64: add generic encoder/decoder, migrate users

[PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 4 months, 2 weeks ago

From: Kuan-Wei Chiu <visitorckw@gmail.com>

Replace the use of strchr() in base64_decode() with precomputed reverse
lookup tables for each variant. This avoids repeated string scans and
improves performance. Use -1 in the tables to mark invalid characters.

Decode:
  64B   ~1530ns  ->  ~75ns    (~20.4x)
  1KB  ~27726ns  -> ~1165ns   (~23.8x)

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
---
 lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 5 deletions(-)

diff --git a/lib/base64.c b/lib/base64.c
index 1af557785..b20fdf168 100644
--- a/lib/base64.c
+++ b/lib/base64.c
@@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
 	[BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
 };
 
+static const s8 base64_rev_tables[][256] = {
+	[BASE64_STD] = {
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
+	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
+	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	},
+	[BASE64_URLSAFE] = {
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
+	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
+	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	},
+	[BASE64_IMAP] = {
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
+	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
+	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	},
+};
+
 /**
  * base64_encode() - Base64-encode some binary data
  * @src: the binary data to encode
@@ -82,11 +139,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
 	int bits = 0;
 	int i;
 	u8 *bp = dst;
-	const char *base64_table = base64_tables[variant];
+	s8 ch;
 
 	for (i = 0; i < srclen; i++) {
-		const char *p = strchr(base64_table, src[i]);
-
 		if (src[i] == '=') {
 			ac = (ac << 6);
 			bits += 6;
@@ -94,9 +149,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
 				bits -= 8;
 			continue;
 		}
-		if (p == NULL || src[i] == 0)
+		ch = base64_rev_tables[variant][(u8)src[i]];
+		if (ch == -1)
 			return -1;
-		ac = (ac << 6) | (p - base64_table);
+		ac = (ac << 6) | ch;
 		bits += 6;
 		if (bits >= 8) {
 			bits -= 8;
-- 
2.34.1

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 4 months, 2 weeks ago

On Fri, 26 Sep 2025 14:55:56 +0800
Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:

> From: Kuan-Wei Chiu <visitorckw@gmail.com>
> 
> Replace the use of strchr() in base64_decode() with precomputed reverse
> lookup tables for each variant. This avoids repeated string scans and
> improves performance. Use -1 in the tables to mark invalid characters.
> 
> Decode:
>   64B   ~1530ns  ->  ~75ns    (~20.4x)
>   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> 
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> ---
>  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/base64.c b/lib/base64.c
> index 1af557785..b20fdf168 100644
> --- a/lib/base64.c
> +++ b/lib/base64.c
> @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
>  	[BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
>  };
>  
> +static const s8 base64_rev_tables[][256] = {
> +	[BASE64_STD] = {
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> +	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	},

Using:
	[BASE64_STD] = {
	[0 ... 255] = -1,
	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 48, 50, 51,
	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
	['+'] = 62,
	['/'] = 63};
would be more readable.
(Assuming no one has turned on a warning that stops you defaulting the entries to -1.)

The is also definitely scope for a #define to common things up.
Even if it has to have the values for all the 5 special characters (-1 if not used)
rather than the characters for 62 and 63.

	David

> +	[BASE64_URLSAFE] = {
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> +	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> +	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	},
> +	[BASE64_IMAP] = {
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> +	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	 -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +	},
> +};
> +
>  /**
>   * base64_encode() - Base64-encode some binary data
>   * @src: the binary data to encode
> @@ -82,11 +139,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
>  	int bits = 0;
>  	int i;
>  	u8 *bp = dst;
> -	const char *base64_table = base64_tables[variant];
> +	s8 ch;
>  
>  	for (i = 0; i < srclen; i++) {
> -		const char *p = strchr(base64_table, src[i]);
> -
>  		if (src[i] == '=') {
>  			ac = (ac << 6);
>  			bits += 6;
> @@ -94,9 +149,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
>  				bits -= 8;
>  			continue;
>  		}
> -		if (p == NULL || src[i] == 0)
> +		ch = base64_rev_tables[variant][(u8)src[i]];
> +		if (ch == -1)
>  			return -1;
> -		ac = (ac << 6) | (p - base64_table);
> +		ac = (ac << 6) | ch;
>  		bits += 6;
>  		if (bits >= 8) {
>  			bits -= 8;

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Caleb Sander Mateos 4 months, 2 weeks ago

On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
>
> From: Kuan-Wei Chiu <visitorckw@gmail.com>
>
> Replace the use of strchr() in base64_decode() with precomputed reverse
> lookup tables for each variant. This avoids repeated string scans and
> improves performance. Use -1 in the tables to mark invalid characters.
>
> Decode:
>   64B   ~1530ns  ->  ~75ns    (~20.4x)
>   1KB  ~27726ns  -> ~1165ns   (~23.8x)
>
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> ---
>  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 61 insertions(+), 5 deletions(-)
>
> diff --git a/lib/base64.c b/lib/base64.c
> index 1af557785..b20fdf168 100644
> --- a/lib/base64.c
> +++ b/lib/base64.c
> @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
>         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
>  };
>
> +static const s8 base64_rev_tables[][256] = {
> +       [BASE64_STD] = {
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +       },
> +       [BASE64_URLSAFE] = {
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +       },
> +       [BASE64_IMAP] = {
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> +       },

Do we actually need 3 separate lookup tables? It looks like all 3
variants agree on the value of any characters they have in common. So
we could combine them into a single lookup table that would work for a
valid base64 string of any variant. The only downside I can see is
that base64 strings which are invalid in some variants might no longer
be rejected by base64_decode().

> +};
> +
>  /**
>   * base64_encode() - Base64-encode some binary data
>   * @src: the binary data to encode
> @@ -82,11 +139,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
>         int bits = 0;
>         int i;
>         u8 *bp = dst;
> -       const char *base64_table = base64_tables[variant];
> +       s8 ch;
>
>         for (i = 0; i < srclen; i++) {
> -               const char *p = strchr(base64_table, src[i]);
> -
>                 if (src[i] == '=') {
>                         ac = (ac << 6);
>                         bits += 6;
> @@ -94,9 +149,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
>                                 bits -= 8;
>                         continue;
>                 }
> -               if (p == NULL || src[i] == 0)
> +               ch = base64_rev_tables[variant][(u8)src[i]];
> +               if (ch == -1)

Checking for < 0 can save an additional comparison here.

Best,
Caleb

>                         return -1;
> -               ac = (ac << 6) | (p - base64_table);
> +               ac = (ac << 6) | ch;
>                 bits += 6;
>                 if (bits >= 8) {
>                         bits -= 8;
> --
> 2.34.1
>
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 4 months, 1 week ago

On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:
> On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> >
> > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> >
> > Replace the use of strchr() in base64_decode() with precomputed reverse
> > lookup tables for each variant. This avoids repeated string scans and
> > improves performance. Use -1 in the tables to mark invalid characters.
> >
> > Decode:
> >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> >
> > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > ---
> >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 61 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/base64.c b/lib/base64.c
> > index 1af557785..b20fdf168 100644
> > --- a/lib/base64.c
> > +++ b/lib/base64.c
> > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> >  };
> >
> > +static const s8 base64_rev_tables[][256] = {
> > +       [BASE64_STD] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> > +       [BASE64_URLSAFE] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> > +       [BASE64_IMAP] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> 
> Do we actually need 3 separate lookup tables? It looks like all 3
> variants agree on the value of any characters they have in common. So
> we could combine them into a single lookup table that would work for a
> valid base64 string of any variant. The only downside I can see is
> that base64 strings which are invalid in some variants might no longer
> be rejected by base64_decode().
>

In addition to the approach David mentioned, maybe we can use a common
lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
symbols with a switch.

For example:

static const s8 base64_rev_common[256] = {
    [0 ... 255] = -1,
    ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
    ['a'] = 26, /* ... */, ['z'] = 51,
    ['0'] = 52, /* ... */, ['9'] = 61,
};

static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
    s8 v = base64_rev_common[c];
    if (v != -1)
        return v;

    switch (variant) {
    case BASE64_STD:
        if (c == '+') return 62;
        if (c == '/') return 63;
        break;
    case BASE64_IMAP:
    	if (c == '+') return 62;
        if (c == ',') return 63;
        break;
    case BASE64_URLSAFE:
        if (c == '-') return 62;
        if (c == '_') return 63;
	break;
    }
    return -1;
}

What do you think?

Best regards,
Guan-Chun

> > +};
> > +
> >  /**
> >   * base64_encode() - Base64-encode some binary data
> >   * @src: the binary data to encode
> > @@ -82,11 +139,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
> >         int bits = 0;
> >         int i;
> >         u8 *bp = dst;
> > -       const char *base64_table = base64_tables[variant];
> > +       s8 ch;
> >
> >         for (i = 0; i < srclen; i++) {
> > -               const char *p = strchr(base64_table, src[i]);
> > -
> >                 if (src[i] == '=') {
> >                         ac = (ac << 6);
> >                         bits += 6;
> > @@ -94,9 +149,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
> >                                 bits -= 8;
> >                         continue;
> >                 }
> > -               if (p == NULL || src[i] == 0)
> > +               ch = base64_rev_tables[variant][(u8)src[i]];
> > +               if (ch == -1)
> 
> Checking for < 0 can save an additional comparison here.
> 
> Best,
> Caleb
> 
> >                         return -1;
> > -               ac = (ac << 6) | (p - base64_table);
> > +               ac = (ac << 6) | ch;
> >                 bits += 6;
> >                 if (bits >= 8) {
> >                         bits -= 8;
> > --
> > 2.34.1
> >
> >

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Caleb Sander Mateos 4 months, 1 week ago

On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
>
> On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:
> > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > >
> > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > >
> > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > lookup tables for each variant. This avoids repeated string scans and
> > > improves performance. Use -1 in the tables to mark invalid characters.
> > >
> > > Decode:
> > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > >
> > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > ---
> > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/lib/base64.c b/lib/base64.c
> > > index 1af557785..b20fdf168 100644
> > > --- a/lib/base64.c
> > > +++ b/lib/base64.c
> > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > >  };
> > >
> > > +static const s8 base64_rev_tables[][256] = {
> > > +       [BASE64_STD] = {
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +       },
> > > +       [BASE64_URLSAFE] = {
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +       },
> > > +       [BASE64_IMAP] = {
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > +       },
> >
> > Do we actually need 3 separate lookup tables? It looks like all 3
> > variants agree on the value of any characters they have in common. So
> > we could combine them into a single lookup table that would work for a
> > valid base64 string of any variant. The only downside I can see is
> > that base64 strings which are invalid in some variants might no longer
> > be rejected by base64_decode().
> >
>
> In addition to the approach David mentioned, maybe we can use a common
> lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> symbols with a switch.
>
> For example:
>
> static const s8 base64_rev_common[256] = {
>     [0 ... 255] = -1,
>     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
>     ['a'] = 26, /* ... */, ['z'] = 51,
>     ['0'] = 52, /* ... */, ['9'] = 61,
> };
>
> static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
>     s8 v = base64_rev_common[c];
>     if (v != -1)
>         return v;
>
>     switch (variant) {
>     case BASE64_STD:
>         if (c == '+') return 62;
>         if (c == '/') return 63;
>         break;
>     case BASE64_IMAP:
>         if (c == '+') return 62;
>         if (c == ',') return 63;
>         break;
>     case BASE64_URLSAFE:
>         if (c == '-') return 62;
>         if (c == '_') return 63;
>         break;
>     }
>     return -1;
> }
>
> What do you think?

That adds several branches in the hot loop, at least 2 of which are
unpredictable for valid base64 input of a given variant (v != -1 as
well as the first c check in the applicable switch case). That seems
like it would hurt performance, no? I think having 3 separate tables
would be preferable to making the hot loop more branchy.

Best,
Caleb

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 4 months, 1 week ago

On Wed, 1 Oct 2025 09:20:27 -0700
Caleb Sander Mateos <csander@purestorage.com> wrote:

> On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> >
> > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:  
> > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > >
> > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > >
> > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > lookup tables for each variant. This avoids repeated string scans and
> > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > >
> > > > Decode:
> > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > >
> > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > ---
> > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > index 1af557785..b20fdf168 100644
> > > > --- a/lib/base64.c
> > > > +++ b/lib/base64.c
> > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > >  };
> > > >
> > > > +static const s8 base64_rev_tables[][256] = {
> > > > +       [BASE64_STD] = {
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +       },
> > > > +       [BASE64_URLSAFE] = {
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +       },
> > > > +       [BASE64_IMAP] = {
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > +       },  
> > >
> > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > variants agree on the value of any characters they have in common. So
> > > we could combine them into a single lookup table that would work for a
> > > valid base64 string of any variant. The only downside I can see is
> > > that base64 strings which are invalid in some variants might no longer
> > > be rejected by base64_decode().
> > >  
> >
> > In addition to the approach David mentioned, maybe we can use a common
> > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > symbols with a switch.

It is certainly possible to generate the initialiser from a #define to
avoid all the replicated source.

> >
> > For example:
> >
> > static const s8 base64_rev_common[256] = {
> >     [0 ... 255] = -1,
> >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,

If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
can assume the characters are sequential and miss ['B'] = etc to
reduce the the line lengths.
(Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)

> >     ['a'] = 26, /* ... */, ['z'] = 51,
> >     ['0'] = 52, /* ... */, ['9'] = 61,
> > };
> >
> > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> >     s8 v = base64_rev_common[c];
> >     if (v != -1)
> >         return v;
> >
> >     switch (variant) {
> >     case BASE64_STD:
> >         if (c == '+') return 62;
> >         if (c == '/') return 63;
> >         break;
> >     case BASE64_IMAP:
> >         if (c == '+') return 62;
> >         if (c == ',') return 63;
> >         break;
> >     case BASE64_URLSAFE:
> >         if (c == '-') return 62;
> >         if (c == '_') return 63;
> >         break;
> >     }
> >     return -1;
> > }
> >
> > What do you think?  
> 
> That adds several branches in the hot loop, at least 2 of which are
> unpredictable for valid base64 input of a given variant (v != -1 as
> well as the first c check in the applicable switch case).

I'd certainly pass in the character values for 62 and 63 so they are
determined well outside the inner loop.
Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').

> That seems like it would hurt performance, no?
> I think having 3 separate tables
> would be preferable to making the hot loop more branchy.

Depends how common you think 62 and 63 are...
I guess 63 comes from 0xff bytes - so might be quite common.

One thing I think you've missed is that the decode converts 4 characters
into 24 bits - which then need carefully writing into the output buffer.
There is no need to check whether each character is valid.
After:
	val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
val_24 will be negative iff one of b[0..3] is invalid.
So you only need to check every 4 input characters, not for every one.
That does require separate tables.
(Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)

	David

> 
> Best,
> Caleb
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 4 months ago

On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:
> On Wed, 1 Oct 2025 09:20:27 -0700
> Caleb Sander Mateos <csander@purestorage.com> wrote:
> 
> > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > >
> > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:  
> > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > > >
> > > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > >
> > > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > >
> > > > > Decode:
> > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > >
> > > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > ---
> > > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > index 1af557785..b20fdf168 100644
> > > > > --- a/lib/base64.c
> > > > > +++ b/lib/base64.c
> > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > >  };
> > > > >
> > > > > +static const s8 base64_rev_tables[][256] = {
> > > > > +       [BASE64_STD] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +       },
> > > > > +       [BASE64_URLSAFE] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +       },
> > > > > +       [BASE64_IMAP] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > +       },  
> > > >
> > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > variants agree on the value of any characters they have in common. So
> > > > we could combine them into a single lookup table that would work for a
> > > > valid base64 string of any variant. The only downside I can see is
> > > > that base64 strings which are invalid in some variants might no longer
> > > > be rejected by base64_decode().
> > > >  
> > >
> > > In addition to the approach David mentioned, maybe we can use a common
> > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > symbols with a switch.
> 
> It is certainly possible to generate the initialiser from a #define to
> avoid all the replicated source.
> 
> > >
> > > For example:
> > >
> > > static const s8 base64_rev_common[256] = {
> > >     [0 ... 255] = -1,
> > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
> 
> If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> can assume the characters are sequential and miss ['B'] = etc to
> reduce the the line lengths.
> (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> 
> > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > };
> > >
> > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > >     s8 v = base64_rev_common[c];
> > >     if (v != -1)
> > >         return v;
> > >
> > >     switch (variant) {
> > >     case BASE64_STD:
> > >         if (c == '+') return 62;
> > >         if (c == '/') return 63;
> > >         break;
> > >     case BASE64_IMAP:
> > >         if (c == '+') return 62;
> > >         if (c == ',') return 63;
> > >         break;
> > >     case BASE64_URLSAFE:
> > >         if (c == '-') return 62;
> > >         if (c == '_') return 63;
> > >         break;
> > >     }
> > >     return -1;
> > > }
> > >
> > > What do you think?  
> > 
> > That adds several branches in the hot loop, at least 2 of which are
> > unpredictable for valid base64 input of a given variant (v != -1 as
> > well as the first c check in the applicable switch case).
> 
> I'd certainly pass in the character values for 62 and 63 so they are
> determined well outside the inner loop.
> Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> 
> > That seems like it would hurt performance, no?
> > I think having 3 separate tables
> > would be preferable to making the hot loop more branchy.
> 
> Depends how common you think 62 and 63 are...
> I guess 63 comes from 0xff bytes - so might be quite common.
> 
> One thing I think you've missed is that the decode converts 4 characters
> into 24 bits - which then need carefully writing into the output buffer.
> There is no need to check whether each character is valid.
> After:
> 	val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> val_24 will be negative iff one of b[0..3] is invalid.
> So you only need to check every 4 input characters, not for every one.
> That does require separate tables.
> (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> 
> 	David
>

Thanks for the feedback.
For the next revision, we’ll use a single lookup table that maps both +
and - to 62, and /, _, and , to 63.
Does this approach sound good to everyone?

Best regards,
Guan-Chun

> > 
> > Best,
> > Caleb
> > 
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Caleb Sander Mateos 4 months ago

On Tue, Oct 7, 2025 at 1:28 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
>
> On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:
> > On Wed, 1 Oct 2025 09:20:27 -0700
> > Caleb Sander Mateos <csander@purestorage.com> wrote:
> >
> > > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > >
> > > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:
> > > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > > > >
> > > > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > >
> > > > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > > >
> > > > > > Decode:
> > > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > > >
> > > > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > ---
> > > > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > > index 1af557785..b20fdf168 100644
> > > > > > --- a/lib/base64.c
> > > > > > +++ b/lib/base64.c
> > > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > > >  };
> > > > > >
> > > > > > +static const s8 base64_rev_tables[][256] = {
> > > > > > +       [BASE64_STD] = {
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +       },
> > > > > > +       [BASE64_URLSAFE] = {
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +       },
> > > > > > +       [BASE64_IMAP] = {
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > +       },
> > > > >
> > > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > > variants agree on the value of any characters they have in common. So
> > > > > we could combine them into a single lookup table that would work for a
> > > > > valid base64 string of any variant. The only downside I can see is
> > > > > that base64 strings which are invalid in some variants might no longer
> > > > > be rejected by base64_decode().
> > > > >
> > > >
> > > > In addition to the approach David mentioned, maybe we can use a common
> > > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > > symbols with a switch.
> >
> > It is certainly possible to generate the initialiser from a #define to
> > avoid all the replicated source.
> >
> > > >
> > > > For example:
> > > >
> > > > static const s8 base64_rev_common[256] = {
> > > >     [0 ... 255] = -1,
> > > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
> >
> > If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> > can assume the characters are sequential and miss ['B'] = etc to
> > reduce the the line lengths.
> > (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> >
> > > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > > };
> > > >
> > > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > > >     s8 v = base64_rev_common[c];
> > > >     if (v != -1)
> > > >         return v;
> > > >
> > > >     switch (variant) {
> > > >     case BASE64_STD:
> > > >         if (c == '+') return 62;
> > > >         if (c == '/') return 63;
> > > >         break;
> > > >     case BASE64_IMAP:
> > > >         if (c == '+') return 62;
> > > >         if (c == ',') return 63;
> > > >         break;
> > > >     case BASE64_URLSAFE:
> > > >         if (c == '-') return 62;
> > > >         if (c == '_') return 63;
> > > >         break;
> > > >     }
> > > >     return -1;
> > > > }
> > > >
> > > > What do you think?
> > >
> > > That adds several branches in the hot loop, at least 2 of which are
> > > unpredictable for valid base64 input of a given variant (v != -1 as
> > > well as the first c check in the applicable switch case).
> >
> > I'd certainly pass in the character values for 62 and 63 so they are
> > determined well outside the inner loop.
> > Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> >
> > > That seems like it would hurt performance, no?
> > > I think having 3 separate tables
> > > would be preferable to making the hot loop more branchy.
> >
> > Depends how common you think 62 and 63 are...
> > I guess 63 comes from 0xff bytes - so might be quite common.
> >
> > One thing I think you've missed is that the decode converts 4 characters
> > into 24 bits - which then need carefully writing into the output buffer.
> > There is no need to check whether each character is valid.
> > After:
> >       val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> > val_24 will be negative iff one of b[0..3] is invalid.
> > So you only need to check every 4 input characters, not for every one.
> > That does require separate tables.
> > (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> >
> >       David
> >
>
> Thanks for the feedback.
> For the next revision, we’ll use a single lookup table that maps both +
> and - to 62, and /, _, and , to 63.
> Does this approach sound good to everyone?

Sounds fine to me. Perhaps worth pointing out that the decision to
accept any base64 variant in the decoder would likely be permanent,
since users may come to depend on it. But I don't see any issue with
it as long as all the base64 variants agree on the values of their
common symbols.

Best,
Caleb

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 4 months ago

On Tue, 7 Oct 2025 07:57:16 -0700
Caleb Sander Mateos <csander@purestorage.com> wrote:

> On Tue, Oct 7, 2025 at 1:28 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> >
> > On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:  
> > > On Wed, 1 Oct 2025 09:20:27 -0700
> > > Caleb Sander Mateos <csander@purestorage.com> wrote:
> > >  
> > > > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > > >
> > > > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:  
> > > > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > > > > >
> > > > > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > >
> > > > > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > > > >
> > > > > > > Decode:
> > > > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > > > >
> > > > > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > ---
> > > > > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > > > >
> > > > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > > > index 1af557785..b20fdf168 100644
> > > > > > > --- a/lib/base64.c
> > > > > > > +++ b/lib/base64.c
> > > > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > > > >  };
> > > > > > >
> > > > > > > +static const s8 base64_rev_tables[][256] = {
...
> > > > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > > > variants agree on the value of any characters they have in common. So
> > > > > > we could combine them into a single lookup table that would work for a
> > > > > > valid base64 string of any variant. The only downside I can see is
> > > > > > that base64 strings which are invalid in some variants might no longer
> > > > > > be rejected by base64_decode().
> > > > > >  
> > > > >
> > > > > In addition to the approach David mentioned, maybe we can use a common
> > > > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > > > symbols with a switch.  
> > >
> > > It is certainly possible to generate the initialiser from a #define to
> > > avoid all the replicated source.
> > >  
> > > > >
> > > > > For example:
> > > > >
> > > > > static const s8 base64_rev_common[256] = {
> > > > >     [0 ... 255] = -1,
> > > > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,  
> > >
> > > If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> > > can assume the characters are sequential and miss ['B'] = etc to
> > > reduce the the line lengths.
> > > (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> > >  
> > > > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > > > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > > > };
> > > > >
> > > > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > > > >     s8 v = base64_rev_common[c];
> > > > >     if (v != -1)
> > > > >         return v;
> > > > >
> > > > >     switch (variant) {
> > > > >     case BASE64_STD:
> > > > >         if (c == '+') return 62;
> > > > >         if (c == '/') return 63;
> > > > >         break;
> > > > >     case BASE64_IMAP:
> > > > >         if (c == '+') return 62;
> > > > >         if (c == ',') return 63;
> > > > >         break;
> > > > >     case BASE64_URLSAFE:
> > > > >         if (c == '-') return 62;
> > > > >         if (c == '_') return 63;
> > > > >         break;
> > > > >     }
> > > > >     return -1;
> > > > > }
> > > > >
> > > > > What do you think?  
> > > >
> > > > That adds several branches in the hot loop, at least 2 of which are
> > > > unpredictable for valid base64 input of a given variant (v != -1 as
> > > > well as the first c check in the applicable switch case).  
> > >
> > > I'd certainly pass in the character values for 62 and 63 so they are
> > > determined well outside the inner loop.
> > > Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> > >  
> > > > That seems like it would hurt performance, no?
> > > > I think having 3 separate tables
> > > > would be preferable to making the hot loop more branchy.  
> > >
> > > Depends how common you think 62 and 63 are...
> > > I guess 63 comes from 0xff bytes - so might be quite common.
> > >
> > > One thing I think you've missed is that the decode converts 4 characters
> > > into 24 bits - which then need carefully writing into the output buffer.
> > > There is no need to check whether each character is valid.
> > > After:
> > >       val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> > > val_24 will be negative iff one of b[0..3] is invalid.
> > > So you only need to check every 4 input characters, not for every one.
> > > That does require separate tables.
> > > (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> > >
> > >       David
> > >  
> >
> > Thanks for the feedback.
> > For the next revision, we’ll use a single lookup table that maps both +
> > and - to 62, and /, _, and , to 63.
> > Does this approach sound good to everyone?  
> 
> Sounds fine to me. Perhaps worth pointing out that the decision to
> accept any base64 variant in the decoder would likely be permanent,
> since users may come to depend on it. But I don't see any issue with
> it as long as all the base64 variants agree on the values of their
> common symbols.

If an incompatible version comes along it'll need a different function
(or similar). But there is no point over-engineering it now.

	David


> 
> Best,
> Caleb

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 4 months ago

On Tue, Oct 07, 2025 at 07:23:27PM +0100, David Laight wrote:
> On Tue, 7 Oct 2025 07:57:16 -0700
> Caleb Sander Mateos <csander@purestorage.com> wrote:
> 
> > On Tue, Oct 7, 2025 at 1:28 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > >
> > > On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:  
> > > > On Wed, 1 Oct 2025 09:20:27 -0700
> > > > Caleb Sander Mateos <csander@purestorage.com> wrote:
> > > >  
> > > > > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > > > >
> > > > > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:  
> > > > > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:  
> > > > > > > >
> > > > > > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > > >
> > > > > > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > > > > >
> > > > > > > > Decode:
> > > > > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > > > > >
> > > > > > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > > ---
> > > > > > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > > > > index 1af557785..b20fdf168 100644
> > > > > > > > --- a/lib/base64.c
> > > > > > > > +++ b/lib/base64.c
> > > > > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > > > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > > > > >  };
> > > > > > > >
> > > > > > > > +static const s8 base64_rev_tables[][256] = {
> ...
> > > > > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > > > > variants agree on the value of any characters they have in common. So
> > > > > > > we could combine them into a single lookup table that would work for a
> > > > > > > valid base64 string of any variant. The only downside I can see is
> > > > > > > that base64 strings which are invalid in some variants might no longer
> > > > > > > be rejected by base64_decode().
> > > > > > >  
> > > > > >
> > > > > > In addition to the approach David mentioned, maybe we can use a common
> > > > > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > > > > symbols with a switch.  
> > > >
> > > > It is certainly possible to generate the initialiser from a #define to
> > > > avoid all the replicated source.
> > > >  
> > > > > >
> > > > > > For example:
> > > > > >
> > > > > > static const s8 base64_rev_common[256] = {
> > > > > >     [0 ... 255] = -1,
> > > > > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,  
> > > >
> > > > If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> > > > can assume the characters are sequential and miss ['B'] = etc to
> > > > reduce the the line lengths.
> > > > (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> > > >  
> > > > > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > > > > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > > > > };
> > > > > >
> > > > > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > > > > >     s8 v = base64_rev_common[c];
> > > > > >     if (v != -1)
> > > > > >         return v;
> > > > > >
> > > > > >     switch (variant) {
> > > > > >     case BASE64_STD:
> > > > > >         if (c == '+') return 62;
> > > > > >         if (c == '/') return 63;
> > > > > >         break;
> > > > > >     case BASE64_IMAP:
> > > > > >         if (c == '+') return 62;
> > > > > >         if (c == ',') return 63;
> > > > > >         break;
> > > > > >     case BASE64_URLSAFE:
> > > > > >         if (c == '-') return 62;
> > > > > >         if (c == '_') return 63;
> > > > > >         break;
> > > > > >     }
> > > > > >     return -1;
> > > > > > }
> > > > > >
> > > > > > What do you think?  
> > > > >
> > > > > That adds several branches in the hot loop, at least 2 of which are
> > > > > unpredictable for valid base64 input of a given variant (v != -1 as
> > > > > well as the first c check in the applicable switch case).  
> > > >
> > > > I'd certainly pass in the character values for 62 and 63 so they are
> > > > determined well outside the inner loop.
> > > > Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> > > >  
> > > > > That seems like it would hurt performance, no?
> > > > > I think having 3 separate tables
> > > > > would be preferable to making the hot loop more branchy.  
> > > >
> > > > Depends how common you think 62 and 63 are...
> > > > I guess 63 comes from 0xff bytes - so might be quite common.
> > > >
> > > > One thing I think you've missed is that the decode converts 4 characters
> > > > into 24 bits - which then need carefully writing into the output buffer.
> > > > There is no need to check whether each character is valid.
> > > > After:
> > > >       val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> > > > val_24 will be negative iff one of b[0..3] is invalid.
> > > > So you only need to check every 4 input characters, not for every one.
> > > > That does require separate tables.
> > > > (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> > > >
> > > >       David
> > > >  
> > >
> > > Thanks for the feedback.
> > > For the next revision, we’ll use a single lookup table that maps both +
> > > and - to 62, and /, _, and , to 63.
> > > Does this approach sound good to everyone?  
> > 
> > Sounds fine to me. Perhaps worth pointing out that the decision to
> > accept any base64 variant in the decoder would likely be permanent,
> > since users may come to depend on it. But I don't see any issue with
> > it as long as all the base64 variants agree on the values of their
> > common symbols.
> 
> If an incompatible version comes along it'll need a different function
> (or similar). But there is no point over-engineering it now.
> 
> 	David
> 
>

As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.
One possible solution I came up with is to first create a shared
base64_rev_common lookup table as the base for all Base64 variants.
Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
can dynamically adjust the character mappings for position 62 and position 63
at runtime, based on the variant.

Here are the changes to the code:

static const s8 base64_rev_common[256] = {
	[0 ... 255] = -1,
	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
};

static const struct {
	char char62, char63;
} base64_symbols[] = {
	[BASE64_STD] = { '+', '/' },
	[BASE64_URLSAFE] = { '-', '_' },
	[BASE64_IMAP] = { '+', ',' },
};

int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
{
	u8 *bp = dst;
	u8 pad_cnt = 0;
	s8 input1, input2, input3, input4;
	u32 val;
	s8 base64_rev_tables[256];

	/* Validate the input length for padding */
	if (unlikely(padding && (srclen & 0x03) != 0))
		return -1;

	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));

	if (variant < BASE64_STD || variant > BASE64_IMAP)
		return -1;

	base64_rev_tables[base64_symbols[variant].char62] = 62;
	base64_rev_tables[base64_symbols[variant].char63] = 63;

	while (padding && srclen > 0 && src[srclen - 1] == '=') {
		pad_cnt++;
		srclen--;
		if (pad_cnt > 2)
			return -1;
	}

	while (srclen >= 4) {
		/* Decode the next 4 characters */
		input1 = base64_rev_tables[(u8)src[0]];
		input2 = base64_rev_tables[(u8)src[1]];
		input3 = base64_rev_tables[(u8)src[2]];
		input4 = base64_rev_tables[(u8)src[3]];

		val = (input1 << 18) |
		      (input2 << 12) |
		      (input3 << 6) |
		      input4;

		if (unlikely((s32)val < 0))
			return -1;

		*bp++ = (u8)(val >> 16);
		*bp++ = (u8)(val >> 8);
		*bp++ = (u8)val;

		src += 4;
		srclen -= 4;
	}

	/* Handle leftover characters when padding is not used */
	if (srclen > 0) {
		switch (srclen) {
		case 2:
			input1 = base64_rev_tables[(u8)src[0]];
			input2 = base64_rev_tables[(u8)src[1]];
			val = (input1 << 6) | input2; /* 12 bits */
			if (unlikely((s32)val < 0 || val & 0x0F))
				return -1;

			*bp++ = (u8)(val >> 4);
			break;
		case 3:
			input1 = base64_rev_tables[(u8)src[0]];
			input2 = base64_rev_tables[(u8)src[1]];
			input3 = base64_rev_tables[(u8)src[2]];

			val = (input1 << 12) |
			      (input2 << 6) |
			      input3; /* 18 bits */
			if (unlikely((s32)val < 0 || val & 0x03))
				return -1;

			*bp++ = (u8)(val >> 10);
			*bp++ = (u8)(val >> 2);
			break;
		default:
			return -1;
		}
	}

	return bp - dst;
}
Based on KUnit testing, the performance results are as follows:
	base64_performance_tests: [64B] decode run : 40ns
	base64_performance_tests: [1KB] decode run : 463ns

However, this approach introduces an issue. It uses 256 bytes of memory
on the stack for base64_rev_tables, which might not be ideal. Does anyone
have any thoughts or alternative suggestions to solve this issue, or is it
not really a concern?

Best regards,
Guan-Chun

> > 
> > Best,
> > Caleb
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 4 months ago

On Thu, 9 Oct 2025 20:25:17 +0800
Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:

...
> As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.

(to avoid two different input buffers giving the same output)

Which is annoyingly reasonable.

> One possible solution I came up with is to first create a shared
> base64_rev_common lookup table as the base for all Base64 variants.
> Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
> can dynamically adjust the character mappings for position 62 and position 63
> at runtime, based on the variant.
> 
> Here are the changes to the code:
> 
> static const s8 base64_rev_common[256] = {
> 	[0 ... 255] = -1,
> 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
> 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
> 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
> 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
> };
> 
> static const struct {
> 	char char62, char63;
> } base64_symbols[] = {
> 	[BASE64_STD] = { '+', '/' },
> 	[BASE64_URLSAFE] = { '-', '_' },
> 	[BASE64_IMAP] = { '+', ',' },
> };
> 
> int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> {
> 	u8 *bp = dst;
> 	u8 pad_cnt = 0;
> 	s8 input1, input2, input3, input4;
> 	u32 val;
> 	s8 base64_rev_tables[256];
> 
> 	/* Validate the input length for padding */
> 	if (unlikely(padding && (srclen & 0x03) != 0))
> 		return -1;

There is no need for an early check.
Pick it up after the loop when 'srclen != 0'.

> 
> 	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));

Ugg - having a memcpy() here is not a good idea.
It really is better to have 3 arrays, but use a 'mostly common' initialiser.
Perhaps:
#define BASE64_REV_INIT(ch_62, ch_63) = { \
	[0 ... 255] = -1, \
	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, \
		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, \
	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \
	[ch_62] = 62, [ch_63] = 63, \
}

static const s8 base64_rev_maps[][256] = {
	[BASE64_STD] = BASE64_REV_INIT('+', '/'),
	[BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
	[BASE64_IMAP] = BASE64_REV_INIT('+', ',')
};

Then (after validating variant):
	const s8 *map = base64_rev_maps[variant];

> 
> 	if (variant < BASE64_STD || variant > BASE64_IMAP)
> 		return -1;
> 
> 	base64_rev_tables[base64_symbols[variant].char62] = 62;
> 	base64_rev_tables[base64_symbols[variant].char63] = 63;
> 
> 	while (padding && srclen > 0 && src[srclen - 1] == '=') {
> 		pad_cnt++;
> 		srclen--;
> 		if (pad_cnt > 2)
> 			return -1;
> 	}

I'm not sure I'd to that there.
You are (in some sense) optimising for padding.
From what I remember, "abcd" gives 24 bits, "abc=" 16 and "ab==" 8.

> 
> 	while (srclen >= 4) {
> 		/* Decode the next 4 characters */
> 		input1 = base64_rev_tables[(u8)src[0]];
> 		input2 = base64_rev_tables[(u8)src[1]];
> 		input3 = base64_rev_tables[(u8)src[2]];
> 		input4 = base64_rev_tables[(u8)src[3]];

I'd be tempted to make src[] unsigned - probably be assigning the parameter
to a local at the top of the function.

Also you have input3 = ... src[2]...
Perhaps they should be input[0..3] instead.

> 
> 		val = (input1 << 18) |
> 		      (input2 << 12) |
> 		      (input3 << 6) |
> 		      input4;

Four lines is excessive, C doesn't require the () and I'm not sure the
compilers complain about << and |.

> 
> 		if (unlikely((s32)val < 0))
> 			return -1;

Make 'val' signed - then you don't need the cast.
You can pick up the padding check here, something like:
			val = input1 << 18 | input2 << 12;
			if (!padding || val < 0 || src[3] != '=')
				return -1;
			*bp++ = val >> 16;
			if (src[2] == '=')
				return bp - dst;
			if (input3 < 0)
				return -1;
			val |= input3 << 6;
			*bp++ = val >> 8;
			return bp - dst;

Or, if you really want to use the code below the loop:
			if (!padding || src[3] != '=')
				return -1;
			padding = 0;
			srclen -= 1 + (src[2] == '=');
			break;


> 
> 		*bp++ = (u8)(val >> 16);
> 		*bp++ = (u8)(val >> 8);
> 		*bp++ = (u8)val;

You don't need those casts.

> 
> 		src += 4;
> 		srclen -= 4;
> 	}
> 
> 	/* Handle leftover characters when padding is not used */

You are coming here with padding.
I'm not sure what should happen without padding.
For a multi-line file decode I suspect the characters need adding to
the start of the next line (ie lines aren't required to contain
multiples of 4 characters - even though they almost always will).

> 	if (srclen > 0) {
> 		switch (srclen) {

You don't need an 'if' and a 'switch'.
srclen is likely to be zero, but perhaps write as:
	if (likely(!srclen))
		return bp - dst;
	if (padding || srclen == 1)
		return -1;

	val = base64_rev_tables[(u8)src[0]] << 12 | base64_rev_tables[(u8)src[1]] << 6;
	*bp++ = val >> 10;
	if (srclen == 1) {
		if (val & 0x800003ff)
			return -1;
	} else {
		val |= base64_rev_tables[(u8)src[2]];
		if (val & 0x80000003)
			return -1;
		*bp++ = val >> 2;
	}
	return bp - dst;
}

	David

> 		case 2:
> 			input1 = base64_rev_tables[(u8)src[0]];
> 			input2 = base64_rev_tables[(u8)src[1]];
> 			val = (input1 << 6) | input2; /* 12 bits */
> 			if (unlikely((s32)val < 0 || val & 0x0F))
> 				return -1;
> 
> 			*bp++ = (u8)(val >> 4);
> 			break;
> 		case 3:
> 			input1 = base64_rev_tables[(u8)src[0]];
> 			input2 = base64_rev_tables[(u8)src[1]];
> 			input3 = base64_rev_tables[(u8)src[2]];
> 
> 			val = (input1 << 12) |
> 			      (input2 << 6) |
> 			      input3; /* 18 bits */
> 			if (unlikely((s32)val < 0 || val & 0x03))
> 				return -1;
> 
> 			*bp++ = (u8)(val >> 10);
> 			*bp++ = (u8)(val >> 2);
> 			break;
> 		default:
> 			return -1;
> 		}
> 	}
> 
> 	return bp - dst;
> }
> Based on KUnit testing, the performance results are as follows:
> 	base64_performance_tests: [64B] decode run : 40ns
> 	base64_performance_tests: [1KB] decode run : 463ns
> 
> However, this approach introduces an issue. It uses 256 bytes of memory
> on the stack for base64_rev_tables, which might not be ideal. Does anyone
> have any thoughts or alternative suggestions to solve this issue, or is it
> not really a concern?
> 
> Best regards,
> Guan-Chun
> 
> > > 
> > > Best,
> > > Caleb  
> >

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 4 months ago

On Fri, Oct 10, 2025 at 10:51:38AM +0100, David Laight wrote:
> On Thu, 9 Oct 2025 20:25:17 +0800
> Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> 
> ...
> > As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.
> 
> (to avoid two different input buffers giving the same output)
> 
> Which is annoyingly reasonable.
> 
> > One possible solution I came up with is to first create a shared
> > base64_rev_common lookup table as the base for all Base64 variants.
> > Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
> > can dynamically adjust the character mappings for position 62 and position 63
> > at runtime, based on the variant.
> > 
> > Here are the changes to the code:
> > 
> > static const s8 base64_rev_common[256] = {
> > 	[0 ... 255] = -1,
> > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
> > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
> > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
> > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
> > };
> > 
> > static const struct {
> > 	char char62, char63;
> > } base64_symbols[] = {
> > 	[BASE64_STD] = { '+', '/' },
> > 	[BASE64_URLSAFE] = { '-', '_' },
> > 	[BASE64_IMAP] = { '+', ',' },
> > };
> > 
> > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> > {
> > 	u8 *bp = dst;
> > 	u8 pad_cnt = 0;
> > 	s8 input1, input2, input3, input4;
> > 	u32 val;
> > 	s8 base64_rev_tables[256];
> > 
> > 	/* Validate the input length for padding */
> > 	if (unlikely(padding && (srclen & 0x03) != 0))
> > 		return -1;
> 
> There is no need for an early check.
> Pick it up after the loop when 'srclen != 0'.
>

I think the early check is still needed, since I'm removing the
padding '=' first.
This makes the handling logic consistent for both padded and unpadded
inputs, and avoids extra if conditions for padding inside the hot loop.

> > 
> > 	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));
> 
> Ugg - having a memcpy() here is not a good idea.
> It really is better to have 3 arrays, but use a 'mostly common' initialiser.
> Perhaps:
> #define BASE64_REV_INIT(ch_62, ch_63) = { \
> 	[0 ... 255] = -1, \
> 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, \
> 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
> 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
> 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, \
> 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \
> 	[ch_62] = 62, [ch_63] = 63, \
> }
> 
> static const s8 base64_rev_maps[][256] = {
> 	[BASE64_STD] = BASE64_REV_INIT('+', '/'),
> 	[BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
> 	[BASE64_IMAP] = BASE64_REV_INIT('+', ',')
> };
> 
> Then (after validating variant):
> 	const s8 *map = base64_rev_maps[variant];
>

Got it. I'll switch to using three static tables with a common initializer
as you suggested.

> > 
> > 	if (variant < BASE64_STD || variant > BASE64_IMAP)
> > 		return -1;
> > 
> > 	base64_rev_tables[base64_symbols[variant].char62] = 62;
> > 	base64_rev_tables[base64_symbols[variant].char63] = 63;
> > 
> > 	while (padding && srclen > 0 && src[srclen - 1] == '=') {
> > 		pad_cnt++;
> > 		srclen--;
> > 		if (pad_cnt > 2)
> > 			return -1;
> > 	}
> 
> I'm not sure I'd to that there.
> You are (in some sense) optimising for padding.
> From what I remember, "abcd" gives 24 bits, "abc=" 16 and "ab==" 8.
> 
> > 
> > 	while (srclen >= 4) {
> > 		/* Decode the next 4 characters */
> > 		input1 = base64_rev_tables[(u8)src[0]];
> > 		input2 = base64_rev_tables[(u8)src[1]];
> > 		input3 = base64_rev_tables[(u8)src[2]];
> > 		input4 = base64_rev_tables[(u8)src[3]];
> 
> I'd be tempted to make src[] unsigned - probably be assigning the parameter
> to a local at the top of the function.
> 
> Also you have input3 = ... src[2]...
> Perhaps they should be input[0..3] instead.
>

OK, I'll make the changes.

> > 
> > 		val = (input1 << 18) |
> > 		      (input2 << 12) |
> > 		      (input3 << 6) |
> > 		      input4;
> 
> Four lines is excessive, C doesn't require the () and I'm not sure the
> compilers complain about << and |.
> 

OK, I'll make the changes.

> > 
> > 		if (unlikely((s32)val < 0))
> > 			return -1;
> 
> Make 'val' signed - then you don't need the cast.
> You can pick up the padding check here, something like:
> 			val = input1 << 18 | input2 << 12;
> 			if (!padding || val < 0 || src[3] != '=')
> 				return -1;
> 			*bp++ = val >> 16;
> 			if (src[2] == '=')
> 				return bp - dst;
> 			if (input3 < 0)
> 				return -1;
> 			val |= input3 << 6;
> 			*bp++ = val >> 8;
> 			return bp - dst;
> 
> Or, if you really want to use the code below the loop:
> 			if (!padding || src[3] != '=')
> 				return -1;
> 			padding = 0;
> 			srclen -= 1 + (src[2] == '=');
> 			break;
> 
> 
> > 
> > 		*bp++ = (u8)(val >> 16);
> > 		*bp++ = (u8)(val >> 8);
> > 		*bp++ = (u8)val;
> 
> You don't need those casts.
>

OK, I'll make the changes.

> > 
> > 		src += 4;
> > 		srclen -= 4;
> > 	}
> > 
> > 	/* Handle leftover characters when padding is not used */
> 
> You are coming here with padding.
> I'm not sure what should happen without padding.
> For a multi-line file decode I suspect the characters need adding to
> the start of the next line (ie lines aren't required to contain
> multiples of 4 characters - even though they almost always will).
> 

Ah, my mistake. I forgot to remove that comment.
Based on my observation, base64_decode() should process the entire input
buffer in a single call, so I believe it does not need to handle
multi-line input.

Best regards,
Guan-Chun

> > 	if (srclen > 0) {
> > 		switch (srclen) {
> 
> You don't need an 'if' and a 'switch'.
> srclen is likely to be zero, but perhaps write as:
> 	if (likely(!srclen))
> 		return bp - dst;
> 	if (padding || srclen == 1)
> 		return -1;
> 
> 	val = base64_rev_tables[(u8)src[0]] << 12 | base64_rev_tables[(u8)src[1]] << 6;
> 	*bp++ = val >> 10;
> 	if (srclen == 1) {
> 		if (val & 0x800003ff)
> 			return -1;
> 	} else {
> 		val |= base64_rev_tables[(u8)src[2]];
> 		if (val & 0x80000003)
> 			return -1;
> 		*bp++ = val >> 2;
> 	}
> 	return bp - dst;
> }
> 
> 	David
> 
> > 		case 2:
> > 			input1 = base64_rev_tables[(u8)src[0]];
> > 			input2 = base64_rev_tables[(u8)src[1]];
> > 			val = (input1 << 6) | input2; /* 12 bits */
> > 			if (unlikely((s32)val < 0 || val & 0x0F))
> > 				return -1;
> > 
> > 			*bp++ = (u8)(val >> 4);
> > 			break;
> > 		case 3:
> > 			input1 = base64_rev_tables[(u8)src[0]];
> > 			input2 = base64_rev_tables[(u8)src[1]];
> > 			input3 = base64_rev_tables[(u8)src[2]];
> > 
> > 			val = (input1 << 12) |
> > 			      (input2 << 6) |
> > 			      input3; /* 18 bits */
> > 			if (unlikely((s32)val < 0 || val & 0x03))
> > 				return -1;
> > 
> > 			*bp++ = (u8)(val >> 10);
> > 			*bp++ = (u8)(val >> 2);
> > 			break;
> > 		default:
> > 			return -1;
> > 		}
> > 	}
> > 
> > 	return bp - dst;
> > }
> > Based on KUnit testing, the performance results are as follows:
> > 	base64_performance_tests: [64B] decode run : 40ns
> > 	base64_performance_tests: [1KB] decode run : 463ns
> > 
> > However, this approach introduces an issue. It uses 256 bytes of memory
> > on the stack for base64_rev_tables, which might not be ideal. Does anyone
> > have any thoughts or alternative suggestions to solve this issue, or is it
> > not really a concern?
> > 
> > Best regards,
> > Guan-Chun
> > 
> > > > 
> > > > Best,
> > > > Caleb  
> > >   
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 3 months, 4 weeks ago

On Mon, 13 Oct 2025 17:49:55 +0800
Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:

> On Fri, Oct 10, 2025 at 10:51:38AM +0100, David Laight wrote:
> > On Thu, 9 Oct 2025 20:25:17 +0800
> > Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > 
> > ...  
> > > As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.  
> > 
> > (to avoid two different input buffers giving the same output)
> > 
> > Which is annoyingly reasonable.
> >   
> > > One possible solution I came up with is to first create a shared
> > > base64_rev_common lookup table as the base for all Base64 variants.
> > > Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
> > > can dynamically adjust the character mappings for position 62 and position 63
> > > at runtime, based on the variant.
> > > 
> > > Here are the changes to the code:
> > > 
> > > static const s8 base64_rev_common[256] = {
> > > 	[0 ... 255] = -1,
> > > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
> > > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
> > > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
> > > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> > > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
> > > };
> > > 
> > > static const struct {
> > > 	char char62, char63;
> > > } base64_symbols[] = {
> > > 	[BASE64_STD] = { '+', '/' },
> > > 	[BASE64_URLSAFE] = { '-', '_' },
> > > 	[BASE64_IMAP] = { '+', ',' },
> > > };
> > > 
> > > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> > > {
> > > 	u8 *bp = dst;
> > > 	u8 pad_cnt = 0;
> > > 	s8 input1, input2, input3, input4;
> > > 	u32 val;
> > > 	s8 base64_rev_tables[256];
> > > 
> > > 	/* Validate the input length for padding */
> > > 	if (unlikely(padding && (srclen & 0x03) != 0))
> > > 		return -1;  
> > 
> > There is no need for an early check.
> > Pick it up after the loop when 'srclen != 0'.
> >  
> 
> I think the early check is still needed, since I'm removing the
> padding '=' first.
> This makes the handling logic consistent for both padded and unpadded
> inputs, and avoids extra if conditions for padding inside the hot loop.

The 'invalid input' check will detect the padding.
Then you don't get an extra check if there is no padding (probably normal).
I realised I didn't get it quite right - updated below.

> 
> > > 
> > > 	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));  
> > 
> > Ugg - having a memcpy() here is not a good idea.
> > It really is better to have 3 arrays, but use a 'mostly common' initialiser.
> > Perhaps:
> > #define BASE64_REV_INIT(ch_62, ch_63) = { \
> > 	[0 ... 255] = -1, \
> > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, \
> > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
> > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
> > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, \
> > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \
> > 	[ch_62] = 62, [ch_63] = 63, \
> > }
> > 
> > static const s8 base64_rev_maps[][256] = {
> > 	[BASE64_STD] = BASE64_REV_INIT('+', '/'),
> > 	[BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
> > 	[BASE64_IMAP] = BASE64_REV_INIT('+', ',')
> > };
> > 
> > Then (after validating variant):
> > 	const s8 *map = base64_rev_maps[variant];
> >  
> 
> Got it. I'll switch to using three static tables with a common initializer
> as you suggested.
> 
> > > 
> > > 	if (variant < BASE64_STD || variant > BASE64_IMAP)
> > > 		return -1;
> > > 
> > > 	base64_rev_tables[base64_symbols[variant].char62] = 62;
> > > 	base64_rev_tables[base64_symbols[variant].char63] = 63;
> > > 
> > > 	while (padding && srclen > 0 && src[srclen - 1] == '=') {
> > > 		pad_cnt++;
> > > 		srclen--;
> > > 		if (pad_cnt > 2)
> > > 			return -1;
> > > 	}  
> > 
> > I'm not sure I'd to that there.
> > You are (in some sense) optimising for padding.
> > From what I remember, "abcd" gives 24 bits, "abc=" 16 and "ab==" 8.
> >   
> > > 
> > > 	while (srclen >= 4) {
> > > 		/* Decode the next 4 characters */
> > > 		input1 = base64_rev_tables[(u8)src[0]];
> > > 		input2 = base64_rev_tables[(u8)src[1]];
> > > 		input3 = base64_rev_tables[(u8)src[2]];
> > > 		input4 = base64_rev_tables[(u8)src[3]];  
> > 
> > I'd be tempted to make src[] unsigned - probably be assigning the parameter
> > to a local at the top of the function.
> > 
> > Also you have input3 = ... src[2]...
> > Perhaps they should be input[0..3] instead.
> >  
> 
> OK, I'll make the changes.
> 
> > > 
> > > 		val = (input1 << 18) |
> > > 		      (input2 << 12) |
> > > 		      (input3 << 6) |
> > > 		      input4;  
> > 
> > Four lines is excessive, C doesn't require the () and I'm not sure the
> > compilers complain about << and |.
> >   
> 
> OK, I'll make the changes.
> 
> > > 
> > > 		if (unlikely((s32)val < 0))
> > > 			return -1;  
> > 
> > Make 'val' signed - then you don't need the cast.
...
> > Or, if you really want to use the code below the loop:
> > 			if (!padding || src[3] != '=')
> > 				return -1;
> > 			padding = 0;
> > 			srclen -= 1 + (src[2] == '=');
> > 			break;

That is missing a test...
Change to:
			if (!padding || srclen != 4 || src[3] != '=')
				return -1;
			padding = 0;
			srclen = src[2] == '=' ? 2 : 3;
			break;

The compiler will then optimise away the first checks after the
loop because it knows they can't happen.

> > 
> >   
> > > 
> > > 		*bp++ = (u8)(val >> 16);
> > > 		*bp++ = (u8)(val >> 8);
> > > 		*bp++ = (u8)val;  
> > 
> > You don't need those casts.
> >  
> 
> OK, I'll make the changes.
> 
> > > 
> > > 		src += 4;
> > > 		srclen -= 4;
> > > 	}
> > > 
> > > 	/* Handle leftover characters when padding is not used */  
> > 
> > You are coming here with padding.
> > I'm not sure what should happen without padding.
> > For a multi-line file decode I suspect the characters need adding to
> > the start of the next line (ie lines aren't required to contain
> > multiples of 4 characters - even though they almost always will).
> >   
> 
> Ah, my mistake. I forgot to remove that comment.
> Based on my observation, base64_decode() should process the entire input
> buffer in a single call, so I believe it does not need to handle
> multi-line input.

I was thinking of the the case where it is processing the output of
something like base64encode.
The caller will have separated out the lines, but I don't know whether
every line has to contain a multiple of 4 characters - or whether the
lines can be arbitrarily split after being encoded (I know that won't
normally happen - but you never know). 

> 
> Best regards,
> Guan-Chun
> 
> > > 	if (srclen > 0) {
> > > 		switch (srclen) {  
> > 
> > You don't need an 'if' and a 'switch'.
> > srclen is likely to be zero, but perhaps write as:
> > 	if (likely(!srclen))
> > 		return bp - dst;
> > 	if (padding || srclen == 1)
> > 		return -1;
> > 
> > 	val = base64_rev_tables[(u8)src[0]] << 12 | base64_rev_tables[(u8)src[1]] << 6;
> > 	*bp++ = val >> 10;
> > 	if (srclen == 1) {
Obviously should be (srclen == 2)
> > 		if (val & 0x800003ff)
> > 			return -1;
> > 	} else {
> > 		val |= base64_rev_tables[(u8)src[2]];
> > 		if (val & 0x80000003)
> > 			return -1;
> > 		*bp++ = val >> 2;
> > 	}
> > 	return bp - dst;
> > }
> > 
> > 	David

	David

> >   
> > > 		case 2:
> > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > 			val = (input1 << 6) | input2; /* 12 bits */
> > > 			if (unlikely((s32)val < 0 || val & 0x0F))
> > > 				return -1;
> > > 
> > > 			*bp++ = (u8)(val >> 4);
> > > 			break;
> > > 		case 3:
> > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > 			input3 = base64_rev_tables[(u8)src[2]];
> > > 
> > > 			val = (input1 << 12) |
> > > 			      (input2 << 6) |
> > > 			      input3; /* 18 bits */
> > > 			if (unlikely((s32)val < 0 || val & 0x03))
> > > 				return -1;
> > > 
> > > 			*bp++ = (u8)(val >> 10);
> > > 			*bp++ = (u8)(val >> 2);
> > > 			break;
> > > 		default:
> > > 			return -1;
> > > 		}
> > > 	}
> > > 
> > > 	return bp - dst;
> > > }
> > > Based on KUnit testing, the performance results are as follows:
> > > 	base64_performance_tests: [64B] decode run : 40ns
> > > 	base64_performance_tests: [1KB] decode run : 463ns
> > > 
> > > However, this approach introduces an issue. It uses 256 bytes of memory
> > > on the stack for base64_rev_tables, which might not be ideal. Does anyone
> > > have any thoughts or alternative suggestions to solve this issue, or is it
> > > not really a concern?
> > > 
> > > Best regards,
> > > Guan-Chun
> > >   
> > > > > 
> > > > > Best,
> > > > > Caleb    
> > > >     
> >

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 3 months, 2 weeks ago

On Tue, Oct 14, 2025 at 09:14:20AM +0100, David Laight wrote:
> On Mon, 13 Oct 2025 17:49:55 +0800
> Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
>
> > On Fri, Oct 10, 2025 at 10:51:38AM +0100, David Laight wrote:
> > > On Thu, 9 Oct 2025 20:25:17 +0800
> > > Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > 
> > > ...  
> > > > As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.  
> > > 
> > > (to avoid two different input buffers giving the same output)
> > > 
> > > Which is annoyingly reasonable.
> > >   
> > > > One possible solution I came up with is to first create a shared
> > > > base64_rev_common lookup table as the base for all Base64 variants.
> > > > Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
> > > > can dynamically adjust the character mappings for position 62 and position 63
> > > > at runtime, based on the variant.
> > > > 
> > > > Here are the changes to the code:
> > > > 
> > > > static const s8 base64_rev_common[256] = {
> > > > 	[0 ... 255] = -1,
> > > > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
> > > > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
> > > > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
> > > > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> > > > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
> > > > };
> > > > 
> > > > static const struct {
> > > > 	char char62, char63;
> > > > } base64_symbols[] = {
> > > > 	[BASE64_STD] = { '+', '/' },
> > > > 	[BASE64_URLSAFE] = { '-', '_' },
> > > > 	[BASE64_IMAP] = { '+', ',' },
> > > > };
> > > > 
> > > > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> > > > {
> > > > 	u8 *bp = dst;
> > > > 	u8 pad_cnt = 0;
> > > > 	s8 input1, input2, input3, input4;
> > > > 	u32 val;
> > > > 	s8 base64_rev_tables[256];
> > > > 
> > > > 	/* Validate the input length for padding */
> > > > 	if (unlikely(padding && (srclen & 0x03) != 0))
> > > > 		return -1;  
> > > 
> > > There is no need for an early check.
> > > Pick it up after the loop when 'srclen != 0'.
> > >  
> > 
> > I think the early check is still needed, since I'm removing the
> > padding '=' first.
> > This makes the handling logic consistent for both padded and unpadded
> > inputs, and avoids extra if conditions for padding inside the hot loop.
> 
> The 'invalid input' check will detect the padding.
> Then you don't get an extra check if there is no padding (probably normal).
> I realised I didn't get it quite right - updated below.
> 
> > 
> > > > 
> > > > 	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));  
> > > 
> > > Ugg - having a memcpy() here is not a good idea.
> > > It really is better to have 3 arrays, but use a 'mostly common' initialiser.
> > > Perhaps:
> > > #define BASE64_REV_INIT(ch_62, ch_63) = { \
> > > 	[0 ... 255] = -1, \
> > > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, \
> > > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
> > > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
> > > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, \
> > > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \
> > > 	[ch_62] = 62, [ch_63] = 63, \
> > > }
> > > 
> > > static const s8 base64_rev_maps[][256] = {
> > > 	[BASE64_STD] = BASE64_REV_INIT('+', '/'),
> > > 	[BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
> > > 	[BASE64_IMAP] = BASE64_REV_INIT('+', ',')
> > > };
> > > 
> > > Then (after validating variant):
> > > 	const s8 *map = base64_rev_maps[variant];
> > >  
> > 
> > Got it. I'll switch to using three static tables with a common initializer
> > as you suggested.
> > 
> > > > 
> > > > 	if (variant < BASE64_STD || variant > BASE64_IMAP)
> > > > 		return -1;
> > > > 
> > > > 	base64_rev_tables[base64_symbols[variant].char62] = 62;
> > > > 	base64_rev_tables[base64_symbols[variant].char63] = 63;
> > > > 
> > > > 	while (padding && srclen > 0 && src[srclen - 1] == '=') {
> > > > 		pad_cnt++;
> > > > 		srclen--;
> > > > 		if (pad_cnt > 2)
> > > > 			return -1;
> > > > 	}  
> > > 
> > > I'm not sure I'd to that there.
> > > You are (in some sense) optimising for padding.
> > > From what I remember, "abcd" gives 24 bits, "abc=" 16 and "ab==" 8.
> > >   
> > > > 
> > > > 	while (srclen >= 4) {
> > > > 		/* Decode the next 4 characters */
> > > > 		input1 = base64_rev_tables[(u8)src[0]];
> > > > 		input2 = base64_rev_tables[(u8)src[1]];
> > > > 		input3 = base64_rev_tables[(u8)src[2]];
> > > > 		input4 = base64_rev_tables[(u8)src[3]];  
> > > 
> > > I'd be tempted to make src[] unsigned - probably be assigning the parameter
> > > to a local at the top of the function.
> > > 
> > > Also you have input3 = ... src[2]...
> > > Perhaps they should be input[0..3] instead.
> > >  
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		val = (input1 << 18) |
> > > > 		      (input2 << 12) |
> > > > 		      (input3 << 6) |
> > > > 		      input4;  
> > > 
> > > Four lines is excessive, C doesn't require the () and I'm not sure the
> > > compilers complain about << and |.
> > >   
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		if (unlikely((s32)val < 0))
> > > > 			return -1;  
> > > 
> > > Make 'val' signed - then you don't need the cast.
> ...
> > > Or, if you really want to use the code below the loop:
> > > 			if (!padding || src[3] != '=')
> > > 				return -1;
> > > 			padding = 0;
> > > 			srclen -= 1 + (src[2] == '=');
> > > 			break;
> 
> That is missing a test...
> Change to:
> 			if (!padding || srclen != 4 || src[3] != '=')
> 				return -1;
> 			padding = 0;
> 			srclen = src[2] == '=' ? 2 : 3;
> 			break;
>
> The compiler will then optimise away the first checks after the
> loop because it knows they can't happen.

Hi David,

I noticed your suggested approach:
val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
Per the C11 draft, this can lead to undefined behavior.
"If E1 has a signed type and nonnegative value, and E1 × 2^E2 is
representable in the result type, then that is the resulting value;
otherwise, the behavior is undefined."
Therefore, left-shifting a negative signed value is undefined behavior.

Perhaps we could change the code as shown below. What do you think?

int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
{
	u8 *bp = dst;
	s8 input[4];
	u32 val;
	const u8 *s = (const u8 *)src;
	const s8 *base64_rev_table = base64_rev_maps[variant];

	while (srclen >= 4) {
		input[0] = base64_rev_table[s[0]];
		input[1] = base64_rev_table[s[1]];
		input[2] = base64_rev_table[s[2]];
		input[3] = base64_rev_table[s[3]];

		if (unlikely((input[0] | input[1] | input[2] | input[3]) < 0)) {
			if (!padding || srclen != 4 || s[3] != '=')
				return -1;
			padding = 0;
			srclen = s[2] == '=' ? 2 : 3;
			break;
		}

		val = (u32)input[0] << 18 | (u32)input[1] << 12 |
			(u32)input[2] << 6 | (u32)input[3];

		*bp++ = val >> 16;
		*bp++ = val >> 8;
		*bp++ = val;

		s += 4;
		srclen -= 4;
	}

	if (likely(!srclen))
		return bp - dst;
	if (padding || srclen == 1)
		return -1;

	input[0] = base64_rev_table[s[0]];
	input[1] = base64_rev_table[s[1]];

	if (unlikely(input[0] < 0 || input[1] < 0))
		return -1;

	val = (u32)input[0] << 18 | (u32)input[1] << 12;

	if (srclen == 2) {
		if (unlikely(input[1] & 0x0f))
			return -1;
		*bp++ = val >> 16;
	} else {
		input[2] = base64_rev_table[s[2]];
		if (unlikely(input[2] < 0 || (input[2] & 0x03)))
			return -1;
		val |= (u32)input[2] << 6;
		*bp++ = val >> 16;
		*bp++ = val >> 8;
	}

	return bp - dst;
}

Best regards,
Guan-Chun

> > > 
> > >   
> > > > 
> > > > 		*bp++ = (u8)(val >> 16);
> > > > 		*bp++ = (u8)(val >> 8);
> > > > 		*bp++ = (u8)val;  
> > > 
> > > You don't need those casts.
> > >  
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		src += 4;
> > > > 		srclen -= 4;
> > > > 	}
> > > > 
> > > > 	/* Handle leftover characters when padding is not used */  
> > > 
> > > You are coming here with padding.
> > > I'm not sure what should happen without padding.
> > > For a multi-line file decode I suspect the characters need adding to
> > > the start of the next line (ie lines aren't required to contain
> > > multiples of 4 characters - even though they almost always will).
> > >   
> > 
> > Ah, my mistake. I forgot to remove that comment.
> > Based on my observation, base64_decode() should process the entire input
> > buffer in a single call, so I believe it does not need to handle
> > multi-line input.
> 
> I was thinking of the the case where it is processing the output of
> something like base64encode.
> The caller will have separated out the lines, but I don't know whether
> every line has to contain a multiple of 4 characters - or whether the
> lines can be arbitrarily split after being encoded (I know that won't
> normally happen - but you never know). 
> 
> > 
> > Best regards,
> > Guan-Chun
> > 
> > > > 	if (srclen > 0) {
> > > > 		switch (srclen) {  
> > > 
> > > You don't need an 'if' and a 'switch'.
> > > srclen is likely to be zero, but perhaps write as:
> > > 	if (likely(!srclen))
> > > 		return bp - dst;
> > > 	if (padding || srclen == 1)
> > > 		return -1;
> > > 
> > > 	val = base64_rev_tables[(u8)src[0]] << 12 | base64_rev_tables[(u8)src[1]] << 6;
> > > 	*bp++ = val >> 10;
> > > 	if (srclen == 1) {
> Obviously should be (srclen == 2)
> > > 		if (val & 0x800003ff)
> > > 			return -1;
> > > 	} else {
> > > 		val |= base64_rev_tables[(u8)src[2]];
> > > 		if (val & 0x80000003)
> > > 			return -1;
> > > 		*bp++ = val >> 2;
> > > 	}
> > > 	return bp - dst;
> > > }
> > > 
> > > 	David
> 
> 	David
> 
> > >   
> > > > 		case 2:
> > > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > > 			val = (input1 << 6) | input2; /* 12 bits */
> > > > 			if (unlikely((s32)val < 0 || val & 0x0F))
> > > > 				return -1;
> > > > 
> > > > 			*bp++ = (u8)(val >> 4);
> > > > 			break;
> > > > 		case 3:
> > > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > > 			input3 = base64_rev_tables[(u8)src[2]];
> > > > 
> > > > 			val = (input1 << 12) |
> > > > 			      (input2 << 6) |
> > > > 			      input3; /* 18 bits */
> > > > 			if (unlikely((s32)val < 0 || val & 0x03))
> > > > 				return -1;
> > > > 
> > > > 			*bp++ = (u8)(val >> 10);
> > > > 			*bp++ = (u8)(val >> 2);
> > > > 			break;
> > > > 		default:
> > > > 			return -1;
> > > > 		}
> > > > 	}
> > > > 
> > > > 	return bp - dst;
> > > > }
> > > > Based on KUnit testing, the performance results are as follows:
> > > > 	base64_performance_tests: [64B] decode run : 40ns
> > > > 	base64_performance_tests: [1KB] decode run : 463ns
> > > > 
> > > > However, this approach introduces an issue. It uses 256 bytes of memory
> > > > on the stack for base64_rev_tables, which might not be ideal. Does anyone
> > > > have any thoughts or alternative suggestions to solve this issue, or is it
> > > > not really a concern?
> > > > 
> > > > Best regards,
> > > > Guan-Chun
> > > >   
> > > > > > 
> > > > > > Best,
> > > > > > Caleb    
> > > > >     
> > >   
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by David Laight 3 months, 2 weeks ago

On Mon, 27 Oct 2025 21:12:00 +0800
Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:

...
> Hi David,
> 
> I noticed your suggested approach:
> val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> Per the C11 draft, this can lead to undefined behavior.
> "If E1 has a signed type and nonnegative value, and E1 × 2^E2 is
> representable in the result type, then that is the resulting value;
> otherwise, the behavior is undefined."
> Therefore, left-shifting a negative signed value is undefined behavior.

Don't worry about that, there are all sorts of places in the kernel
where shifts of negative values are technically undefined.

They are undefined because you get different values for 1's compliment
and 'sign overpunch' signed integers.
Even for 2's compliment C doesn't require a 'sign bit replicating'
right shift.
(And I suspect both gcc and clang only support 2's compliment.)

I don't think even clang is stupid enough to silently not emit any
instructions for shifts of negative values.
It is another place where it should be 'implementation defined' rather
than 'undefined' behaviour.

> Perhaps we could change the code as shown below. What do you think?

If you are really worried, change the '<< n' to '* (1 << n)' which
obfuscates the code.
The compiler will convert it straight back to a simple shift.

I bet that if you look hard enough even 'a | b' is undefined if
either is negative.

	David

	David

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 3 months, 2 weeks ago

On Mon, Oct 27, 2025 at 02:18:02PM +0000, David Laight wrote:
> On Mon, 27 Oct 2025 21:12:00 +0800
> Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> 
> ...
> > Hi David,
> > 
> > I noticed your suggested approach:
> > val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> > Per the C11 draft, this can lead to undefined behavior.
> > "If E1 has a signed type and nonnegative value, and E1 × 2^E2 is
> > representable in the result type, then that is the resulting value;
> > otherwise, the behavior is undefined."
> > Therefore, left-shifting a negative signed value is undefined behavior.
> 
> Don't worry about that, there are all sorts of places in the kernel
> where shifts of negative values are technically undefined.
> 
> They are undefined because you get different values for 1's compliment
> and 'sign overpunch' signed integers.
> Even for 2's compliment C doesn't require a 'sign bit replicating'
> right shift.
> (And I suspect both gcc and clang only support 2's compliment.)
> 
> I don't think even clang is stupid enough to silently not emit any
> instructions for shifts of negative values.
> It is another place where it should be 'implementation defined' rather
> than 'undefined' behaviour.
>

Hi David,

Thanks for your explanation. I'll proceed with the modification according
to your original suggestion.

Best regards,
Guan-Chun

> > Perhaps we could change the code as shown below. What do you think?
> 
> If you are really worried, change the '<< n' to '* (1 << n)' which
> obfuscates the code.
> The compiler will convert it straight back to a simple shift.
> 
> I bet that if you look hard enough even 'a | b' is undefined if
> either is negative.
> 
> 	David
> 
> 
> 
> 	David

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Guan-Chun Wu 3 months, 3 weeks ago

On Tue, Oct 14, 2025 at 09:14:20AM +0100, David Laight wrote:
> On Mon, 13 Oct 2025 17:49:55 +0800
> Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> 
> > On Fri, Oct 10, 2025 at 10:51:38AM +0100, David Laight wrote:
> > > On Thu, 9 Oct 2025 20:25:17 +0800
> > > Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > 
> > > ...  
> > > > As Eric mentioned, the decoder in fs/crypto/ needs to reject invalid input.  
> > > 
> > > (to avoid two different input buffers giving the same output)
> > > 
> > > Which is annoyingly reasonable.
> > >   
> > > > One possible solution I came up with is to first create a shared
> > > > base64_rev_common lookup table as the base for all Base64 variants.
> > > > Then, depending on the variant (e.g., BASE64_STD, BASE64_URLSAFE, etc.), we
> > > > can dynamically adjust the character mappings for position 62 and position 63
> > > > at runtime, based on the variant.
> > > > 
> > > > Here are the changes to the code:
> > > > 
> > > > static const s8 base64_rev_common[256] = {
> > > > 	[0 ... 255] = -1,
> > > > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
> > > > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
> > > > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
> > > > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> > > > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
> > > > };
> > > > 
> > > > static const struct {
> > > > 	char char62, char63;
> > > > } base64_symbols[] = {
> > > > 	[BASE64_STD] = { '+', '/' },
> > > > 	[BASE64_URLSAFE] = { '-', '_' },
> > > > 	[BASE64_IMAP] = { '+', ',' },
> > > > };
> > > > 
> > > > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> > > > {
> > > > 	u8 *bp = dst;
> > > > 	u8 pad_cnt = 0;
> > > > 	s8 input1, input2, input3, input4;
> > > > 	u32 val;
> > > > 	s8 base64_rev_tables[256];
> > > > 
> > > > 	/* Validate the input length for padding */
> > > > 	if (unlikely(padding && (srclen & 0x03) != 0))
> > > > 		return -1;  
> > > 
> > > There is no need for an early check.
> > > Pick it up after the loop when 'srclen != 0'.
> > >  
> > 
> > I think the early check is still needed, since I'm removing the
> > padding '=' first.
> > This makes the handling logic consistent for both padded and unpadded
> > inputs, and avoids extra if conditions for padding inside the hot loop.
> 
> The 'invalid input' check will detect the padding.
> Then you don't get an extra check if there is no padding (probably normal).
> I realised I didn't get it quite right - updated below.
> 
> > 
> > > > 
> > > > 	memcpy(base64_rev_tables, base64_rev_common, sizeof(base64_rev_common));  
> > > 
> > > Ugg - having a memcpy() here is not a good idea.
> > > It really is better to have 3 arrays, but use a 'mostly common' initialiser.
> > > Perhaps:
> > > #define BASE64_REV_INIT(ch_62, ch_63) = { \
> > > 	[0 ... 255] = -1, \
> > > 	['A'] =  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, \
> > > 		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
> > > 	['a'] = 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
> > > 		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, \
> > > 	['0'] = 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \
> > > 	[ch_62] = 62, [ch_63] = 63, \
> > > }
> > > 
> > > static const s8 base64_rev_maps[][256] = {
> > > 	[BASE64_STD] = BASE64_REV_INIT('+', '/'),
> > > 	[BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
> > > 	[BASE64_IMAP] = BASE64_REV_INIT('+', ',')
> > > };
> > > 
> > > Then (after validating variant):
> > > 	const s8 *map = base64_rev_maps[variant];
> > >  
> > 
> > Got it. I'll switch to using three static tables with a common initializer
> > as you suggested.
> > 
> > > > 
> > > > 	if (variant < BASE64_STD || variant > BASE64_IMAP)
> > > > 		return -1;
> > > > 
> > > > 	base64_rev_tables[base64_symbols[variant].char62] = 62;
> > > > 	base64_rev_tables[base64_symbols[variant].char63] = 63;
> > > > 
> > > > 	while (padding && srclen > 0 && src[srclen - 1] == '=') {
> > > > 		pad_cnt++;
> > > > 		srclen--;
> > > > 		if (pad_cnt > 2)
> > > > 			return -1;
> > > > 	}  
> > > 
> > > I'm not sure I'd to that there.
> > > You are (in some sense) optimising for padding.
> > > From what I remember, "abcd" gives 24 bits, "abc=" 16 and "ab==" 8.
> > >   
> > > > 
> > > > 	while (srclen >= 4) {
> > > > 		/* Decode the next 4 characters */
> > > > 		input1 = base64_rev_tables[(u8)src[0]];
> > > > 		input2 = base64_rev_tables[(u8)src[1]];
> > > > 		input3 = base64_rev_tables[(u8)src[2]];
> > > > 		input4 = base64_rev_tables[(u8)src[3]];  
> > > 
> > > I'd be tempted to make src[] unsigned - probably be assigning the parameter
> > > to a local at the top of the function.
> > > 
> > > Also you have input3 = ... src[2]...
> > > Perhaps they should be input[0..3] instead.
> > >  
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		val = (input1 << 18) |
> > > > 		      (input2 << 12) |
> > > > 		      (input3 << 6) |
> > > > 		      input4;  
> > > 
> > > Four lines is excessive, C doesn't require the () and I'm not sure the
> > > compilers complain about << and |.
> > >   
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		if (unlikely((s32)val < 0))
> > > > 			return -1;  
> > > 
> > > Make 'val' signed - then you don't need the cast.
> ...
> > > Or, if you really want to use the code below the loop:
> > > 			if (!padding || src[3] != '=')
> > > 				return -1;
> > > 			padding = 0;
> > > 			srclen -= 1 + (src[2] == '=');
> > > 			break;
> 
> That is missing a test...
> Change to:
> 			if (!padding || srclen != 4 || src[3] != '=')
> 				return -1;
> 			padding = 0;
> 			srclen = src[2] == '=' ? 2 : 3;
> 			break;
> 
> The compiler will then optimise away the first checks after the
> loop because it knows they can't happen.
> 
> > > 
> > >   
> > > > 
> > > > 		*bp++ = (u8)(val >> 16);
> > > > 		*bp++ = (u8)(val >> 8);
> > > > 		*bp++ = (u8)val;  
> > > 
> > > You don't need those casts.
> > >  
> > 
> > OK, I'll make the changes.
> > 
> > > > 
> > > > 		src += 4;
> > > > 		srclen -= 4;
> > > > 	}
> > > > 
> > > > 	/* Handle leftover characters when padding is not used */  
> > > 
> > > You are coming here with padding.
> > > I'm not sure what should happen without padding.
> > > For a multi-line file decode I suspect the characters need adding to
> > > the start of the next line (ie lines aren't required to contain
> > > multiples of 4 characters - even though they almost always will).
> > >   
> > 
> > Ah, my mistake. I forgot to remove that comment.
> > Based on my observation, base64_decode() should process the entire input
> > buffer in a single call, so I believe it does not need to handle
> > multi-line input.
> 
> I was thinking of the the case where it is processing the output of
> something like base64encode.
> The caller will have separated out the lines, but I don't know whether
> every line has to contain a multiple of 4 characters - or whether the
> lines can be arbitrarily split after being encoded (I know that won't
> normally happen - but you never know). 
>

I believe the splitting should be aligned to multiples of 4,
since Base64 encoding operates on 4-character blocks that represent 3 bytes
of data.
If it's split arbitrarily, the decoded result may differ from the original
data or even become invalid.

Best regards,
Guan-Chun

> > 
> > Best regards,
> > Guan-Chun
> > 
> > > > 	if (srclen > 0) {
> > > > 		switch (srclen) {  
> > > 
> > > You don't need an 'if' and a 'switch'.
> > > srclen is likely to be zero, but perhaps write as:
> > > 	if (likely(!srclen))
> > > 		return bp - dst;
> > > 	if (padding || srclen == 1)
> > > 		return -1;
> > > 
> > > 	val = base64_rev_tables[(u8)src[0]] << 12 | base64_rev_tables[(u8)src[1]] << 6;
> > > 	*bp++ = val >> 10;
> > > 	if (srclen == 1) {
> Obviously should be (srclen == 2)
> > > 		if (val & 0x800003ff)
> > > 			return -1;
> > > 	} else {
> > > 		val |= base64_rev_tables[(u8)src[2]];
> > > 		if (val & 0x80000003)
> > > 			return -1;
> > > 		*bp++ = val >> 2;
> > > 	}
> > > 	return bp - dst;
> > > }
> > > 
> > > 	David
> 
> 	David
> 
> > >   
> > > > 		case 2:
> > > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > > 			val = (input1 << 6) | input2; /* 12 bits */
> > > > 			if (unlikely((s32)val < 0 || val & 0x0F))
> > > > 				return -1;
> > > > 
> > > > 			*bp++ = (u8)(val >> 4);
> > > > 			break;
> > > > 		case 3:
> > > > 			input1 = base64_rev_tables[(u8)src[0]];
> > > > 			input2 = base64_rev_tables[(u8)src[1]];
> > > > 			input3 = base64_rev_tables[(u8)src[2]];
> > > > 
> > > > 			val = (input1 << 12) |
> > > > 			      (input2 << 6) |
> > > > 			      input3; /* 18 bits */
> > > > 			if (unlikely((s32)val < 0 || val & 0x03))
> > > > 				return -1;
> > > > 
> > > > 			*bp++ = (u8)(val >> 10);
> > > > 			*bp++ = (u8)(val >> 2);
> > > > 			break;
> > > > 		default:
> > > > 			return -1;
> > > > 		}
> > > > 	}
> > > > 
> > > > 	return bp - dst;
> > > > }
> > > > Based on KUnit testing, the performance results are as follows:
> > > > 	base64_performance_tests: [64B] decode run : 40ns
> > > > 	base64_performance_tests: [1KB] decode run : 463ns
> > > > 
> > > > However, this approach introduces an issue. It uses 256 bytes of memory
> > > > on the stack for base64_rev_tables, which might not be ideal. Does anyone
> > > > have any thoughts or alternative suggestions to solve this issue, or is it
> > > > not really a concern?
> > > > 
> > > > Best regards,
> > > > Guan-Chun
> > > >   
> > > > > > 
> > > > > > Best,
> > > > > > Caleb    
> > > > >     
> > >   
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Eric Biggers 4 months ago

On Tue, Oct 07, 2025 at 07:57:16AM -0700, Caleb Sander Mateos wrote:
> On Tue, Oct 7, 2025 at 1:28 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> >
> > On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:
> > > On Wed, 1 Oct 2025 09:20:27 -0700
> > > Caleb Sander Mateos <csander@purestorage.com> wrote:
> > >
> > > > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > > >
> > > > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:
> > > > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> > > > > > >
> > > > > > > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > >
> > > > > > > Replace the use of strchr() in base64_decode() with precomputed reverse
> > > > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > > > >
> > > > > > > Decode:
> > > > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > > > >
> > > > > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > > > > > > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > > > > > > ---
> > > > > > >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > > > >
> > > > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > > > index 1af557785..b20fdf168 100644
> > > > > > > --- a/lib/base64.c
> > > > > > > +++ b/lib/base64.c
> > > > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > > > >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > > > >  };
> > > > > > >
> > > > > > > +static const s8 base64_rev_tables[][256] = {
> > > > > > > +       [BASE64_STD] = {
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +       },
> > > > > > > +       [BASE64_URLSAFE] = {
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +       },
> > > > > > > +       [BASE64_IMAP] = {
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > > > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > > > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > > > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > > > > > > +       },
> > > > > >
> > > > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > > > variants agree on the value of any characters they have in common. So
> > > > > > we could combine them into a single lookup table that would work for a
> > > > > > valid base64 string of any variant. The only downside I can see is
> > > > > > that base64 strings which are invalid in some variants might no longer
> > > > > > be rejected by base64_decode().
> > > > > >
> > > > >
> > > > > In addition to the approach David mentioned, maybe we can use a common
> > > > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > > > symbols with a switch.
> > >
> > > It is certainly possible to generate the initialiser from a #define to
> > > avoid all the replicated source.
> > >
> > > > >
> > > > > For example:
> > > > >
> > > > > static const s8 base64_rev_common[256] = {
> > > > >     [0 ... 255] = -1,
> > > > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
> > >
> > > If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> > > can assume the characters are sequential and miss ['B'] = etc to
> > > reduce the the line lengths.
> > > (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> > >
> > > > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > > > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > > > };
> > > > >
> > > > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > > > >     s8 v = base64_rev_common[c];
> > > > >     if (v != -1)
> > > > >         return v;
> > > > >
> > > > >     switch (variant) {
> > > > >     case BASE64_STD:
> > > > >         if (c == '+') return 62;
> > > > >         if (c == '/') return 63;
> > > > >         break;
> > > > >     case BASE64_IMAP:
> > > > >         if (c == '+') return 62;
> > > > >         if (c == ',') return 63;
> > > > >         break;
> > > > >     case BASE64_URLSAFE:
> > > > >         if (c == '-') return 62;
> > > > >         if (c == '_') return 63;
> > > > >         break;
> > > > >     }
> > > > >     return -1;
> > > > > }
> > > > >
> > > > > What do you think?
> > > >
> > > > That adds several branches in the hot loop, at least 2 of which are
> > > > unpredictable for valid base64 input of a given variant (v != -1 as
> > > > well as the first c check in the applicable switch case).
> > >
> > > I'd certainly pass in the character values for 62 and 63 so they are
> > > determined well outside the inner loop.
> > > Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> > >
> > > > That seems like it would hurt performance, no?
> > > > I think having 3 separate tables
> > > > would be preferable to making the hot loop more branchy.
> > >
> > > Depends how common you think 62 and 63 are...
> > > I guess 63 comes from 0xff bytes - so might be quite common.
> > >
> > > One thing I think you've missed is that the decode converts 4 characters
> > > into 24 bits - which then need carefully writing into the output buffer.
> > > There is no need to check whether each character is valid.
> > > After:
> > >       val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> > > val_24 will be negative iff one of b[0..3] is invalid.
> > > So you only need to check every 4 input characters, not for every one.
> > > That does require separate tables.
> > > (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> > >
> > >       David
> > >
> >
> > Thanks for the feedback.
> > For the next revision, we’ll use a single lookup table that maps both +
> > and - to 62, and /, _, and , to 63.
> > Does this approach sound good to everyone?
> 
> Sounds fine to me. Perhaps worth pointing out that the decision to
> accept any base64 variant in the decoder would likely be permanent,
> since users may come to depend on it. But I don't see any issue with
> it as long as all the base64 variants agree on the values of their
> common symbols.

No thanks.  fs/crypto/ needs to have a correct Base64 decoder which
rejects invalid inputs, so that multiple filenames aren't accepted for
the same file.  If lib/ won't provide that, then please keep fs/crypto/
as-is.

- Eric

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Posted by Kuan-Wei Chiu 4 months, 2 weeks ago

On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:
> On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu <409411716@gms.tku.edu.tw> wrote:
> >
> > From: Kuan-Wei Chiu <visitorckw@gmail.com>
> >
> > Replace the use of strchr() in base64_decode() with precomputed reverse
> > lookup tables for each variant. This avoids repeated string scans and
> > improves performance. Use -1 in the tables to mark invalid characters.
> >
> > Decode:
> >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> >
> > Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> > Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
> > ---
> >  lib/base64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 61 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/base64.c b/lib/base64.c
> > index 1af557785..b20fdf168 100644
> > --- a/lib/base64.c
> > +++ b/lib/base64.c
> > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> >         [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> >  };
> >
> > +static const s8 base64_rev_tables[][256] = {
> > +       [BASE64_STD] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,  -1,  63,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> > +       [BASE64_URLSAFE] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  -1,  -1,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  63,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> > +       [BASE64_IMAP] = {
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  63,  -1,  -1,  -1,
> > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
> > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
> > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
> > +       },
> 
> Do we actually need 3 separate lookup tables? It looks like all 3
> variants agree on the value of any characters they have in common. So
> we could combine them into a single lookup table that would work for a
> valid base64 string of any variant. The only downside I can see is
> that base64 strings which are invalid in some variants might no longer
> be rejected by base64_decode().

Ah, David also mentioned this earlier, but I forgot about it while
writing the code. Sorry for that. I'll rectify it.

Regards,
Kuan-Wei

> 
> > +};
> > +
> >  /**
> >   * base64_encode() - Base64-encode some binary data
> >   * @src: the binary data to encode
> > @@ -82,11 +139,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
> >         int bits = 0;
> >         int i;
> >         u8 *bp = dst;
> > -       const char *base64_table = base64_tables[variant];
> > +       s8 ch;
> >
> >         for (i = 0; i < srclen; i++) {
> > -               const char *p = strchr(base64_table, src[i]);
> > -
> >                 if (src[i] == '=') {
> >                         ac = (ac << 6);
> >                         bits += 6;
> > @@ -94,9 +149,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6
> >                                 bits -= 8;
> >                         continue;
> >                 }
> > -               if (p == NULL || src[i] == 0)
> > +               ch = base64_rev_tables[variant][(u8)src[i]];
> > +               if (ch == -1)
> 
> Checking for < 0 can save an additional comparison here.
> 
> Best,
> Caleb
> 
> >                         return -1;
> > -               ac = (ac << 6) | (p - base64_table);
> > +               ac = (ac << 6) | ch;
> >                 bits += 6;
> >                 if (bits >= 8) {
> >                         bits -= 8;
> > --
> > 2.34.1
> >
> >

[PATCH v3 1/6] lib/base64: Add support for multiple variants
[PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables
[PATCH v3 3/6] lib/base64: rework encode/decode for speed and stricter validation
[PATCH v3 4/6] lib: add KUnit tests for base64 encoding/decoding
[PATCH v3 5/6] fscrypt: replace local base64url helpers with lib/base64
[PATCH v3 6/6] ceph: replace local base64 helpers with lib/base64