[PATCH 1/5] json-parser: pass around lookahead token, constify

Paolo Bonzini posted 5 patches 1 month ago
Maintainers: Markus Armbruster <armbru@redhat.com>
[PATCH 1/5] json-parser: pass around lookahead token, constify
Posted by Paolo Bonzini 1 month ago
Pass the lookahead token down to the various functions implementing the
recursive descent, instead of first peeking and then getting the token
again multiple times.

The main purpose of this patch is to switch the argument passing style
for parse_* functions to something more desirable for a push parser,
which gets one and exactly one token at a time.  However, there are
some minor improvement in code size and a bugfix as well.

In particular, because parse_array() and parse_object() can assume
that the opening bracket/brace is not anymore in the token stream, it
is now apparent that the first entry of an array incorrectly used the
'[' character (stored in "token") as its location.  parse_pair, for
comparison, handled it correctly:

    if (!key) {
        parse_error(ctxt, peek, "key is not a string in object");
        goto out;
    }

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qobject/json-parser.c | 125 +++++++++++++++++++-----------------------
 1 file changed, 55 insertions(+), 70 deletions(-)

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 7483e582fea..eabc9f8358c 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -49,13 +49,13 @@ typedef struct JSONParserContext {
  * 4) deal with premature EOI
  */
 
-static QObject *parse_value(JSONParserContext *ctxt);
+static QObject *parse_value(JSONParserContext *ctxt, const JSONToken *token);
 
 /**
  * Error handler
  */
 static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
-                                           JSONToken *token, const char *msg, ...)
+                                           const JSONToken *token, const char *msg, ...)
 {
     va_list ap;
     char message[1024];
@@ -126,7 +126,7 @@ static int cvt4hex(const char *s)
  * - Invalid Unicode characters are rejected.
  * - Control characters \x00..\x1F are rejected by the lexer.
  */
-static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
+static QString *parse_string(JSONParserContext *ctxt, const JSONToken *token)
 {
     const char *ptr = token->str;
     GString *str;
@@ -235,42 +235,29 @@ out:
     return NULL;
 }
 
-/* Note: the token object returned by parser_context_peek_token or
- * parser_context_pop_token is deleted as soon as parser_context_pop_token
- * is called again.
+/* Note: the token object returned by parser_context_pop_token is
+ * deleted as soon as parser_context_pop_token is called again.
  */
-static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
+static const JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
 {
     g_free(ctxt->current);
     ctxt->current = g_queue_pop_head(ctxt->buf);
     return ctxt->current;
 }
 
-static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
-{
-    return g_queue_peek_head(ctxt->buf);
-}
-
 /**
  * Parsing rules
  */
-static int parse_pair(JSONParserContext *ctxt, QDict *dict)
+static int parse_pair(JSONParserContext *ctxt, const JSONToken *token, QDict *dict)
 {
     QObject *key_obj = NULL;
     QString *key;
     QObject *value;
-    JSONToken *peek, *token;
 
-    peek = parser_context_peek_token(ctxt);
-    if (peek == NULL) {
-        parse_error(ctxt, NULL, "premature EOI");
-        goto out;
-    }
-
-    key_obj = parse_value(ctxt);
+    key_obj = parse_value(ctxt, token);
     key = qobject_to(QString, key_obj);
     if (!key) {
-        parse_error(ctxt, peek, "key is not a string in object");
+        parse_error(ctxt, token, "key is not a string in object");
         goto out;
     }
 
@@ -285,7 +272,13 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict)
         goto out;
     }
 
-    value = parse_value(ctxt);
+    token = parser_context_pop_token(ctxt);
+    if (token == NULL) {
+        parse_error(ctxt, NULL, "premature EOI");
+        goto out;
+    }
+
+    value = parse_value(ctxt, token);
     if (value == NULL) {
         parse_error(ctxt, token, "Missing value in dict");
         goto out;
@@ -309,21 +302,18 @@ out:
 static QObject *parse_object(JSONParserContext *ctxt)
 {
     QDict *dict = NULL;
-    JSONToken *token, *peek;
-
-    token = parser_context_pop_token(ctxt);
-    assert(token && token->type == JSON_LCURLY);
+    const JSONToken *token;
 
     dict = qdict_new();
 
-    peek = parser_context_peek_token(ctxt);
-    if (peek == NULL) {
+    token = parser_context_pop_token(ctxt);
+    if (token == NULL) {
         parse_error(ctxt, NULL, "premature EOI");
         goto out;
     }
 
-    if (peek->type != JSON_RCURLY) {
-        if (parse_pair(ctxt, dict) == -1) {
+    if (token->type != JSON_RCURLY) {
+        if (parse_pair(ctxt, token, dict) == -1) {
             goto out;
         }
 
@@ -339,7 +329,13 @@ static QObject *parse_object(JSONParserContext *ctxt)
                 goto out;
             }
 
-            if (parse_pair(ctxt, dict) == -1) {
+            token = parser_context_pop_token(ctxt);
+            if (token == NULL) {
+                parse_error(ctxt, NULL, "premature EOI");
+                goto out;
+            }
+
+            if (parse_pair(ctxt, token, dict) == -1) {
                 goto out;
             }
 
@@ -349,8 +345,6 @@ static QObject *parse_object(JSONParserContext *ctxt)
                 goto out;
             }
         }
-    } else {
-        (void)parser_context_pop_token(ctxt);
     }
 
     return QOBJECT(dict);
@@ -363,23 +357,20 @@ out:
 static QObject *parse_array(JSONParserContext *ctxt)
 {
     QList *list = NULL;
-    JSONToken *token, *peek;
-
-    token = parser_context_pop_token(ctxt);
-    assert(token && token->type == JSON_LSQUARE);
+    const JSONToken *token;
 
     list = qlist_new();
 
-    peek = parser_context_peek_token(ctxt);
-    if (peek == NULL) {
+    token = parser_context_pop_token(ctxt);
+    if (token == NULL) {
         parse_error(ctxt, NULL, "premature EOI");
         goto out;
     }
 
-    if (peek->type != JSON_RSQUARE) {
+    if (token->type != JSON_RSQUARE) {
         QObject *obj;
 
-        obj = parse_value(ctxt);
+        obj = parse_value(ctxt, token);
         if (obj == NULL) {
             parse_error(ctxt, token, "expecting value");
             goto out;
@@ -399,7 +390,13 @@ static QObject *parse_array(JSONParserContext *ctxt)
                 goto out;
             }
 
-            obj = parse_value(ctxt);
+            token = parser_context_pop_token(ctxt);
+            if (token == NULL) {
+                parse_error(ctxt, NULL, "premature EOI");
+                goto out;
+            }
+
+            obj = parse_value(ctxt, token);
             if (obj == NULL) {
                 parse_error(ctxt, token, "expecting value");
                 goto out;
@@ -413,8 +410,6 @@ static QObject *parse_array(JSONParserContext *ctxt)
                 goto out;
             }
         }
-    } else {
-        (void)parser_context_pop_token(ctxt);
     }
 
     return QOBJECT(list);
@@ -424,11 +419,8 @@ out:
     return NULL;
 }
 
-static QObject *parse_keyword(JSONParserContext *ctxt)
+static QObject *parse_keyword(JSONParserContext *ctxt, const JSONToken *token)
 {
-    JSONToken *token;
-
-    token = parser_context_pop_token(ctxt);
     assert(token && token->type == JSON_KEYWORD);
 
     if (!strcmp(token->str, "true")) {
@@ -442,11 +434,8 @@ static QObject *parse_keyword(JSONParserContext *ctxt)
     return NULL;
 }
 
-static QObject *parse_interpolation(JSONParserContext *ctxt)
+static QObject *parse_interpolation(JSONParserContext *ctxt, const JSONToken *token)
 {
-    JSONToken *token;
-
-    token = parser_context_pop_token(ctxt);
     assert(token && token->type == JSON_INTERP);
 
     if (!strcmp(token->str, "%p")) {
@@ -478,11 +467,8 @@ static QObject *parse_interpolation(JSONParserContext *ctxt)
     return NULL;
 }
 
-static QObject *parse_literal(JSONParserContext *ctxt)
+static QObject *parse_literal(JSONParserContext *ctxt, const JSONToken *token)
 {
-    JSONToken *token;
-
-    token = parser_context_pop_token(ctxt);
     assert(token);
 
     switch (token->type) {
@@ -530,29 +516,21 @@ static QObject *parse_literal(JSONParserContext *ctxt)
     }
 }
 
-static QObject *parse_value(JSONParserContext *ctxt)
+static QObject *parse_value(JSONParserContext *ctxt, const JSONToken *token)
 {
-    JSONToken *token;
-
-    token = parser_context_peek_token(ctxt);
-    if (token == NULL) {
-        parse_error(ctxt, NULL, "premature EOI");
-        return NULL;
-    }
-
     switch (token->type) {
     case JSON_LCURLY:
         return parse_object(ctxt);
     case JSON_LSQUARE:
         return parse_array(ctxt);
     case JSON_INTERP:
-        return parse_interpolation(ctxt);
+        return parse_interpolation(ctxt, token);
     case JSON_INTEGER:
     case JSON_FLOAT:
     case JSON_STRING:
-        return parse_literal(ctxt);
+        return parse_literal(ctxt, token);
     case JSON_KEYWORD:
-        return parse_keyword(ctxt);
+        return parse_keyword(ctxt, token);
     default:
         parse_error(ctxt, token, "expecting value");
         return NULL;
@@ -575,8 +553,15 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 {
     JSONParserContext ctxt = { .buf = tokens, .ap = ap };
     QObject *result;
+    const JSONToken *token;
 
-    result = parse_value(&ctxt);
+    token = parser_context_pop_token(&ctxt);
+    if (token == NULL) {
+        parse_error(&ctxt, NULL, "premature EOI");
+        return NULL;
+    }
+
+    result = parse_value(&ctxt, token);
     assert(ctxt.err || g_queue_is_empty(ctxt.buf));
 
     error_propagate(errp, ctxt.err);
-- 
2.52.0
Re: [PATCH 1/5] json-parser: pass around lookahead token, constify
Posted by Markus Armbruster 3 days, 11 hours ago
Subject smells like a patch splitting opportunity: a trivial patch
adding const, followed by the more interesting patch passing around the
lookahead.  Or the other way round.

In fact, I just did this to help me review the interesting patch: the
trivial patch adds const to JSONToken *.  I can split in my tree.

Paolo Bonzini <pbonzini@redhat.com> writes:

> Pass the lookahead token down to the various functions implementing the
> recursive descent, instead of first peeking and then getting the token
> again multiple times.
>
> The main purpose of this patch is to switch the argument passing style
> for parse_* functions to something more desirable for a push parser,
> which gets one and exactly one token at a time.

Peeking at the lookahead is just fine in a traditional parser, but
you're preparing the way for a push parser.  Got it.

>                                                  However, there are
> some minor improvement in code size and a bugfix as well.
>
> In particular, because parse_array() and parse_object() can assume
> that the opening bracket/brace is not anymore in the token stream, it
> is now apparent that the first entry of an array incorrectly used the
> '[' character (stored in "token") as its location.

For a value of "apparent".  Not sure I would've seen it without your
heads up.

I believe you're talking about

        obj = parse_value(ctxt);
        if (obj == NULL) {
            parse_error(ctxt, token, "expecting value");
            goto out;
        }

where @token indeed holds the array's '['.

This isn't the only spot where we pass a bad @token argument to
parse_error().  For instance:

        parse_error(ctxt, NULL, "premature EOI");

These bad arguments are all masked by parse_error() completely ignoring
its argument!  We tell users what's wrong with the input, but telling
them where it's wrong has been TODO since the initial commit in 2009[*].
I see your PATCH 5 finally takes care of it.

Losely related: how the parser recovers from syntax errors.  Consider
parse_pair():

    key_obj = parse_value(ctxt);

If @key_obj, parse_value() successfully parsed a value into @key_obj.

Else, it reported an error.

    key = qobject_to(QString, key_obj);

If @key, parse_value() recognized a string.

Else, it either recognized something else, or reported an error.

    if (!key) {
        parse_error(ctxt, peek, "key is not a string in object");

This error makes sense if it recognized something else.

It doesn't when it reported an error.  Works, sort of, because
parse_error() keeps only the first error, and silently ignores
subsequent ones.

Robust parsers handle errors explicitly, they don't blindly continue,
throwing away any subsequent errors.  Moreover, I find these misleading
errors grating when I read the code.

But I digress.

        goto out;
    }

>                                                     parse_pair, for
> comparison, handled it correctly:
>
>     if (!key) {
>         parse_error(ctxt, peek, "key is not a string in object");
>         goto out;
>     }
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  qobject/json-parser.c | 125 +++++++++++++++++++-----------------------
>  1 file changed, 55 insertions(+), 70 deletions(-)
>
> diff --git a/qobject/json-parser.c b/qobject/json-parser.c
> index 7483e582fea..eabc9f8358c 100644
> --- a/qobject/json-parser.c
> +++ b/qobject/json-parser.c
> @@ -49,13 +49,13 @@ typedef struct JSONParserContext {
>   * 4) deal with premature EOI
>   */
>  
> -static QObject *parse_value(JSONParserContext *ctxt);
> +static QObject *parse_value(JSONParserContext *ctxt, const JSONToken *token);
>  
>  /**
>   * Error handler
>   */
>  static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
> -                                           JSONToken *token, const char *msg, ...)
> +                                           const JSONToken *token, const char *msg, ...)

checkpatch points out

    0001-json-parser-pass-around-lookahead-token-constify.patch:48: WARNING: line over 80 characters

Easy enough to avoid.  Both

   static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
                                               const JSONToken *token,
                                               const char *msg, ...)

and

   static void G_GNUC_PRINTF(3, 4)
   parse_error(JSONParserContext *ctxt, const JSONToken *token,
               const char *msg, ...)

would be easier on my eyes.

More of the same below, not flagging again.

>  {
>      va_list ap;
>      char message[1024];
> @@ -126,7 +126,7 @@ static int cvt4hex(const char *s)
>   * - Invalid Unicode characters are rejected.
>   * - Control characters \x00..\x1F are rejected by the lexer.
>   */
> -static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
> +static QString *parse_string(JSONParserContext *ctxt, const JSONToken *token)
>  {
>      const char *ptr = token->str;
>      GString *str;
> @@ -235,42 +235,29 @@ out:
>      return NULL;
>  }
>  
> -/* Note: the token object returned by parser_context_peek_token or
> - * parser_context_pop_token is deleted as soon as parser_context_pop_token
> - * is called again.
> +/* Note: the token object returned by parser_context_pop_token is
> + * deleted as soon as parser_context_pop_token is called again.
>   */
> -static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
> +static const JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
>  {
>      g_free(ctxt->current);
>      ctxt->current = g_queue_pop_head(ctxt->buf);
>      return ctxt->current;
>  }
>  
> -static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
> -{
> -    return g_queue_peek_head(ctxt->buf);
> -}
> -
>  /**
>   * Parsing rules
>   */
> -static int parse_pair(JSONParserContext *ctxt, QDict *dict)
> +static int parse_pair(JSONParserContext *ctxt, const JSONToken *token, QDict *dict)
>  {
>      QObject *key_obj = NULL;
>      QString *key;
>      QObject *value;
> -    JSONToken *peek, *token;
>  
> -    peek = parser_context_peek_token(ctxt);
> -    if (peek == NULL) {
> -        parse_error(ctxt, NULL, "premature EOI");
> -        goto out;
> -    }
> -
> -    key_obj = parse_value(ctxt);
> +    key_obj = parse_value(ctxt, token);
>      key = qobject_to(QString, key_obj);
>      if (!key) {
> -        parse_error(ctxt, peek, "key is not a string in object");
> +        parse_error(ctxt, token, "key is not a string in object");
>          goto out;
>      }
>  

@peek isn't exactly a great name, but @token tells me even less.
@first_token?

> @@ -285,7 +272,13 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict)
>          goto out;
>      }
>  
> -    value = parse_value(ctxt);
> +    token = parser_context_pop_token(ctxt);

Assignment to parameter.  Not a fan.

If we rename the parameter, we can keep the local variable @token for
use here.

> +    if (token == NULL) {
> +        parse_error(ctxt, NULL, "premature EOI");
> +        goto out;
> +    }
> +
> +    value = parse_value(ctxt, token);
>      if (value == NULL) {
>          parse_error(ctxt, token, "Missing value in dict");
>          goto out;
> @@ -309,21 +302,18 @@ out:
>  static QObject *parse_object(JSONParserContext *ctxt)
>  {
>      QDict *dict = NULL;
> -    JSONToken *token, *peek;
> -
> -    token = parser_context_pop_token(ctxt);
> -    assert(token && token->type == JSON_LCURLY);
> +    const JSONToken *token;

Before the patch, parse_object() parses the entire object, as its name
indicates, with the precondition that the lookahead is '{'.

After the patch, it parses the part of the object after the '{'.  Hmm.

We could pass it the first token, like we do for all the other parsing
functions except this one and parse_array(), and keep the assertion.
I'd prefer that, but it's a matter of taste.

>  
>      dict = qdict_new();
>  
> -    peek = parser_context_peek_token(ctxt);
> -    if (peek == NULL) {
> +    token = parser_context_pop_token(ctxt);
> +    if (token == NULL) {
>          parse_error(ctxt, NULL, "premature EOI");
>          goto out;
>      }
>  
> -    if (peek->type != JSON_RCURLY) {
> -        if (parse_pair(ctxt, dict) == -1) {
> +    if (token->type != JSON_RCURLY) {
> +        if (parse_pair(ctxt, token, dict) == -1) {
>              goto out;
>          }
>  
> @@ -339,7 +329,13 @@ static QObject *parse_object(JSONParserContext *ctxt)
>                  goto out;
>              }
>  
> -            if (parse_pair(ctxt, dict) == -1) {
> +            token = parser_context_pop_token(ctxt);
> +            if (token == NULL) {
> +                parse_error(ctxt, NULL, "premature EOI");
> +                goto out;
> +            }
> +
> +            if (parse_pair(ctxt, token, dict) == -1) {
>                  goto out;
>              }
>  
> @@ -349,8 +345,6 @@ static QObject *parse_object(JSONParserContext *ctxt)
>                  goto out;
>              }
>          }
> -    } else {
> -        (void)parser_context_pop_token(ctxt);
>      }
>  
>      return QOBJECT(dict);
> @@ -363,23 +357,20 @@ out:
>  static QObject *parse_array(JSONParserContext *ctxt)
>  {
>      QList *list = NULL;
> -    JSONToken *token, *peek;
> -
> -    token = parser_context_pop_token(ctxt);
> -    assert(token && token->type == JSON_LSQUARE);
> +    const JSONToken *token;

Likewise.

>  
>      list = qlist_new();
>  
> -    peek = parser_context_peek_token(ctxt);
> -    if (peek == NULL) {
> +    token = parser_context_pop_token(ctxt);
> +    if (token == NULL) {
>          parse_error(ctxt, NULL, "premature EOI");
>          goto out;
>      }
>  
> -    if (peek->type != JSON_RSQUARE) {
> +    if (token->type != JSON_RSQUARE) {
>          QObject *obj;
>  
> -        obj = parse_value(ctxt);
> +        obj = parse_value(ctxt, token);
>          if (obj == NULL) {
>              parse_error(ctxt, token, "expecting value");
>              goto out;
> @@ -399,7 +390,13 @@ static QObject *parse_array(JSONParserContext *ctxt)
>                  goto out;
>              }
>  
> -            obj = parse_value(ctxt);
> +            token = parser_context_pop_token(ctxt);
> +            if (token == NULL) {
> +                parse_error(ctxt, NULL, "premature EOI");
> +                goto out;
> +            }
> +
> +            obj = parse_value(ctxt, token);
>              if (obj == NULL) {
>                  parse_error(ctxt, token, "expecting value");
>                  goto out;
> @@ -413,8 +410,6 @@ static QObject *parse_array(JSONParserContext *ctxt)
>                  goto out;
>              }
>          }
> -    } else {
> -        (void)parser_context_pop_token(ctxt);
>      }
>  
>      return QOBJECT(list);
> @@ -424,11 +419,8 @@ out:
>      return NULL;
>  }
>  
> -static QObject *parse_keyword(JSONParserContext *ctxt)
> +static QObject *parse_keyword(JSONParserContext *ctxt, const JSONToken *token)
>  {
> -    JSONToken *token;
> -
> -    token = parser_context_pop_token(ctxt);
>      assert(token && token->type == JSON_KEYWORD);
>  
>      if (!strcmp(token->str, "true")) {
> @@ -442,11 +434,8 @@ static QObject *parse_keyword(JSONParserContext *ctxt)
>      return NULL;
>  }
>  
> -static QObject *parse_interpolation(JSONParserContext *ctxt)
> +static QObject *parse_interpolation(JSONParserContext *ctxt, const JSONToken *token)
>  {
> -    JSONToken *token;
> -
> -    token = parser_context_pop_token(ctxt);
>      assert(token && token->type == JSON_INTERP);
>  
>      if (!strcmp(token->str, "%p")) {
> @@ -478,11 +467,8 @@ static QObject *parse_interpolation(JSONParserContext *ctxt)
>      return NULL;
>  }
>  
> -static QObject *parse_literal(JSONParserContext *ctxt)
> +static QObject *parse_literal(JSONParserContext *ctxt, const JSONToken *token)
>  {
> -    JSONToken *token;
> -
> -    token = parser_context_pop_token(ctxt);
>      assert(token);
>  
>      switch (token->type) {
> @@ -530,29 +516,21 @@ static QObject *parse_literal(JSONParserContext *ctxt)
>      }
>  }
>  
> -static QObject *parse_value(JSONParserContext *ctxt)
> +static QObject *parse_value(JSONParserContext *ctxt, const JSONToken *token)

If we name parse_pair()'s new argument @first_token, we should name this
one the same.

>  {
> -    JSONToken *token;
> -
> -    token = parser_context_peek_token(ctxt);
> -    if (token == NULL) {
> -        parse_error(ctxt, NULL, "premature EOI");
> -        return NULL;
> -    }
> -
>      switch (token->type) {
>      case JSON_LCURLY:
>          return parse_object(ctxt);
>      case JSON_LSQUARE:
>          return parse_array(ctxt);
>      case JSON_INTERP:
> -        return parse_interpolation(ctxt);
> +        return parse_interpolation(ctxt, token);
>      case JSON_INTEGER:
>      case JSON_FLOAT:
>      case JSON_STRING:
> -        return parse_literal(ctxt);
> +        return parse_literal(ctxt, token);
>      case JSON_KEYWORD:
> -        return parse_keyword(ctxt);
> +        return parse_keyword(ctxt, token);
>      default:
>          parse_error(ctxt, token, "expecting value");
>          return NULL;
> @@ -575,8 +553,15 @@ QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
>  {
>      JSONParserContext ctxt = { .buf = tokens, .ap = ap };
>      QObject *result;
> +    const JSONToken *token;
>  
> -    result = parse_value(&ctxt);
> +    token = parser_context_pop_token(&ctxt);
> +    if (token == NULL) {
> +        parse_error(&ctxt, NULL, "premature EOI");
> +        return NULL;
> +    }
> +
> +    result = parse_value(&ctxt, token);
>      assert(ctxt.err || g_queue_is_empty(ctxt.buf));
>  
>      error_propagate(errp, ctxt.err);

       while (!g_queue_is_empty(ctxt.buf)) {
           parser_context_pop_token(&ctxt);

This is the only call of parser_context_pop_token() that isn't
immediately followed by "if it returned null, report "premature EOI".
I went "let's factor this out", then realized the next patch replaces
all this.  Nevermind!

       }
       g_free(ctxt.current);

       return result;
   }


Summary:

* parse_pair() and parse_value() now receive the first token as
  argument.  Popping the first token moves to their callers.

* parse_keyword(), parse_interpolation(), and parse_literal() now
  receive the token as argument.  Popping it moves to their callers.
  parse_string() works like this even before the patch, and is not
  changed.

* parse_object() and parse_array() now parse the part after the object's
  / array's first token.

Works.



[*] Reminds me of "Ed is generous enough to flag errors, yet prudent
enough not to overwhelm the novice with verbosity."
Re: [PATCH 1/5] json-parser: pass around lookahead token, constify
Posted by Paolo Bonzini 3 days, 11 hours ago
On Fri, Feb 6, 2026 at 11:46 AM Markus Armbruster <armbru@redhat.com> wrote:
> > In particular, because parse_array() and parse_object() can assume
> > that the opening bracket/brace is not anymore in the token stream, it
> > is now apparent that the first entry of an array incorrectly used the
> > '[' character (stored in "token") as its location.
>
> For a value of "apparent".  Not sure I would've seen it without your
> heads up.

Me neither - I found it only while reviewing my own changes. :)

> I believe you're talking about
>
>         obj = parse_value(ctxt);
>         if (obj == NULL) {
>             parse_error(ctxt, token, "expecting value");
>             goto out;
>         }
>
> where @token indeed holds the array's '['.

Correct.

>         parse_error(ctxt, NULL, "premature EOI");
>
> These bad arguments are all masked by parse_error() completely ignoring
> its argument!  We tell users what's wrong with the input, but telling
> them where it's wrong has been TODO since the initial commit in 2009[*].
> I see your PATCH 5 finally takes care of it.

Yep, the clearer distinction between the parser and streamer helps a
little in that respect.

> > -    key_obj = parse_value(ctxt);
> > +    key_obj = parse_value(ctxt, token);
> >      key = qobject_to(QString, key_obj);
> >      if (!key) {
> > -        parse_error(ctxt, peek, "key is not a string in object");
> > +        parse_error(ctxt, token, "key is not a string in object");
> >          goto out;
> >      }
> >
>
> @peek isn't exactly a great name, but @token tells me even less.
> @first_token?

Possibly - though it all goes away since the parser becomes a state
machine, and the only separate functions that survive are for
one-token literals; which is why I just went simply with "token"
everywhere (as in, "next token" or "lookahead that led the parser to
call this function").

> >  static QObject *parse_object(JSONParserContext *ctxt)
> >  {
> >      QDict *dict = NULL;
> > -    JSONToken *token, *peek;
> > -
> > -    token = parser_context_pop_token(ctxt);
> > -    assert(token && token->type == JSON_LCURLY);
> > +    const JSONToken *token;
>
> Before the patch, parse_object() parses the entire object, as its name
> indicates, with the precondition that the lookahead is '{'.
>
> After the patch, it parses the part of the object after the '{'.  Hmm.
>
> We could pass it the first token, like we do for all the other parsing
> functions except this one and parse_array(), and keep the assertion.
> I'd prefer that, but it's a matter of taste.

I see, yes we could. I can do this in v2, since I'm sure you'll
(rightly) find something in the other patches that warrants a resend.

> This is the only call of parser_context_pop_token() that isn't
> immediately followed by "if it returned null, report "premature EOI".
> I went "let's factor this out", then realized the next patch replaces
> all this.  Nevermind!

Yeah, not an excuse for being sloppy but perhaps an excuse for not
being too picky. :)

> * parse_pair() and parse_value() now receive the first token as
>   argument.  Popping the first token moves to their callers.
>
> * parse_keyword(), parse_interpolation(), and parse_literal() now
>   receive the token as argument.  Popping it moves to their callers.
>   parse_string() works like this even before the patch, and is not
>   changed.
>
> * parse_object() and parse_array() now parse the part after the object's
>   / array's first token.
>
> Works.

Good :)

Paolo