[PATCH v4 11/14] qapi/introspect.py: add type hint annotations

John Snow posted 14 patches 4 years, 9 months ago
Maintainers: Michael Roth <michael.roth@amd.com>, Markus Armbruster <armbru@redhat.com>
There is a newer version of this series
[PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by John Snow 4 years, 9 months ago
Signed-off-by: John Snow <jsnow@redhat.com>
---
 scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
 scripts/qapi/mypy.ini      |   5 --
 scripts/qapi/schema.py     |   2 +-
 3 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
index 60ec326d2c7..b7f2a6cf260 100644
--- a/scripts/qapi/introspect.py
+++ b/scripts/qapi/introspect.py
@@ -30,10 +30,19 @@
 )
 from .gen import QAPISchemaMonolithicCVisitor
 from .schema import (
+    QAPISchema,
     QAPISchemaArrayType,
     QAPISchemaBuiltinType,
+    QAPISchemaEntity,
+    QAPISchemaEnumMember,
+    QAPISchemaFeature,
+    QAPISchemaObjectType,
+    QAPISchemaObjectTypeMember,
     QAPISchemaType,
+    QAPISchemaVariant,
+    QAPISchemaVariants,
 )
+from .source import QAPISourceInfo
 
 
 # This module constructs a tree data structure that is used to
@@ -57,6 +66,8 @@
 _value = Union[_scalar, _nonscalar]
 TreeValue = Union[_value, 'Annotated[_value]']
 
+# This is a (strict) alias for an arbitrary object non-scalar, as above:
+_DObject = Dict[str, object]
 
 _NodeT = TypeVar('_NodeT', bound=TreeValue)
 
@@ -76,9 +87,11 @@ def __init__(self, value: _NodeT, ifcond: Iterable[str],
         self.ifcond: Tuple[str, ...] = tuple(ifcond)
 
 
-def _tree_to_qlit(obj, level=0, suppress_first_indent=False):
+def _tree_to_qlit(obj: TreeValue,
+                  level: int = 0,
+                  suppress_first_indent: bool = False) -> str:
 
-    def indent(level):
+    def indent(level: int) -> str:
         return level * 4 * ' '
 
     if isinstance(obj, Annotated):
@@ -135,21 +148,21 @@ def indent(level):
     return ret
 
 
-def to_c_string(string):
+def to_c_string(string: str) -> str:
     return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"'
 
 
 class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor):
 
-    def __init__(self, prefix, unmask):
+    def __init__(self, prefix: str, unmask: bool):
         super().__init__(
             prefix, 'qapi-introspect',
             ' * QAPI/QMP schema introspection', __doc__)
         self._unmask = unmask
-        self._schema = None
-        self._trees = []
-        self._used_types = []
-        self._name_map = {}
+        self._schema: Optional[QAPISchema] = None
+        self._trees: List[Annotated[_DObject]] = []
+        self._used_types: List[QAPISchemaType] = []
+        self._name_map: Dict[str, str] = {}
         self._genc.add(mcgen('''
 #include "qemu/osdep.h"
 #include "%(prefix)sqapi-introspect.h"
@@ -157,10 +170,10 @@ def __init__(self, prefix, unmask):
 ''',
                              prefix=prefix))
 
-    def visit_begin(self, schema):
+    def visit_begin(self, schema: QAPISchema) -> None:
         self._schema = schema
 
-    def visit_end(self):
+    def visit_end(self) -> None:
         # visit the types that are actually used
         for typ in self._used_types:
             typ.visit(self)
@@ -182,18 +195,18 @@ def visit_end(self):
         self._used_types = []
         self._name_map = {}
 
-    def visit_needed(self, entity):
+    def visit_needed(self, entity: QAPISchemaEntity) -> bool:
         # Ignore types on first pass; visit_end() will pick up used types
         return not isinstance(entity, QAPISchemaType)
 
-    def _name(self, name):
+    def _name(self, name: str) -> str:
         if self._unmask:
             return name
         if name not in self._name_map:
             self._name_map[name] = '%d' % len(self._name_map)
         return self._name_map[name]
 
-    def _use_type(self, typ):
+    def _use_type(self, typ: QAPISchemaType) -> str:
         assert self._schema is not None
 
         # Map the various integer types to plain int
@@ -215,10 +228,13 @@ def _use_type(self, typ):
         return self._name(typ.name)
 
     @staticmethod
-    def _gen_features(features):
+    def _gen_features(features: List[QAPISchemaFeature]
+                      ) -> List[Annotated[str]]:
         return [Annotated(f.name, f.ifcond) for f in features]
 
-    def _gen_tree(self, name, mtype, obj, ifcond, features):
+    def _gen_tree(self, name: str, mtype: str, obj: _DObject,
+                  ifcond: List[str],
+                  features: Optional[List[QAPISchemaFeature]]) -> None:
         comment: Optional[str] = None
         if mtype not in ('command', 'event', 'builtin', 'array'):
             if not self._unmask:
@@ -232,47 +248,67 @@ def _gen_tree(self, name, mtype, obj, ifcond, features):
             obj['features'] = self._gen_features(features)
         self._trees.append(Annotated(obj, ifcond, comment))
 
-    def _gen_member(self, member):
-        obj = {'name': member.name, 'type': self._use_type(member.type)}
+    def _gen_member(self, member: QAPISchemaObjectTypeMember
+                    ) -> Annotated[_DObject]:
+        obj: _DObject = {
+            'name': member.name,
+            'type': self._use_type(member.type)
+        }
         if member.optional:
             obj['default'] = None
         if member.features:
             obj['features'] = self._gen_features(member.features)
         return Annotated(obj, member.ifcond)
 
-    def _gen_variants(self, tag_name, variants):
+    def _gen_variants(self, tag_name: str,
+                      variants: List[QAPISchemaVariant]) -> _DObject:
         return {'tag': tag_name,
                 'variants': [self._gen_variant(v) for v in variants]}
 
-    def _gen_variant(self, variant):
-        obj = {'case': variant.name, 'type': self._use_type(variant.type)}
+    def _gen_variant(self, variant: QAPISchemaVariant) -> Annotated[_DObject]:
+        obj: _DObject = {
+            'case': variant.name,
+            'type': self._use_type(variant.type)
+        }
         return Annotated(obj, variant.ifcond)
 
-    def visit_builtin_type(self, name, info, json_type):
+    def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo],
+                           json_type: str) -> None:
         self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None)
 
-    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
+    def visit_enum_type(self, name: str, info: Optional[QAPISourceInfo],
+                        ifcond: List[str], features: List[QAPISchemaFeature],
+                        members: List[QAPISchemaEnumMember],
+                        prefix: Optional[str]) -> None:
         self._gen_tree(
             name, 'enum',
             {'values': [Annotated(m.name, m.ifcond) for m in members]},
             ifcond, features
         )
 
-    def visit_array_type(self, name, info, ifcond, element_type):
+    def visit_array_type(self, name: str, info: Optional[QAPISourceInfo],
+                         ifcond: List[str],
+                         element_type: QAPISchemaType) -> None:
         element = self._use_type(element_type)
         self._gen_tree('[' + element + ']', 'array', {'element-type': element},
                        ifcond, None)
 
-    def visit_object_type_flat(self, name, info, ifcond, features,
-                               members, variants):
-        obj = {'members': [self._gen_member(m) for m in members]}
+    def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo],
+                               ifcond: List[str],
+                               features: List[QAPISchemaFeature],
+                               members: List[QAPISchemaObjectTypeMember],
+                               variants: Optional[QAPISchemaVariants]) -> None:
+        obj: _DObject = {'members': [self._gen_member(m) for m in members]}
         if variants:
             obj.update(self._gen_variants(variants.tag_member.name,
                                           variants.variants))
 
         self._gen_tree(name, 'object', obj, ifcond, features)
 
-    def visit_alternate_type(self, name, info, ifcond, features, variants):
+    def visit_alternate_type(self, name: str, info: Optional[QAPISourceInfo],
+                             ifcond: List[str],
+                             features: List[QAPISchemaFeature],
+                             variants: QAPISchemaVariants) -> None:
         self._gen_tree(name, 'alternate', {'members': [
                 Annotated({'type': self._use_type(m.type)}, m.ifcond)
                 for m in variants.variants
@@ -280,27 +316,38 @@ def visit_alternate_type(self, name, info, ifcond, features, variants):
             ifcond, features
         )
 
-    def visit_command(self, name, info, ifcond, features,
-                      arg_type, ret_type, gen, success_response, boxed,
-                      allow_oob, allow_preconfig, coroutine):
+    def visit_command(self, name: str, info: Optional[QAPISourceInfo],
+                      ifcond: List[str],
+                      features: List[QAPISchemaFeature],
+                      arg_type: Optional[QAPISchemaObjectType],
+                      ret_type: Optional[QAPISchemaType], gen: bool,
+                      success_response: bool, boxed: bool, allow_oob: bool,
+                      allow_preconfig: bool, coroutine: bool) -> None:
         assert self._schema is not None
 
         arg_type = arg_type or self._schema.the_empty_object_type
         ret_type = ret_type or self._schema.the_empty_object_type
-        obj = {'arg-type': self._use_type(arg_type),
-               'ret-type': self._use_type(ret_type)}
+        obj: _DObject = {
+            'arg-type': self._use_type(arg_type),
+            'ret-type': self._use_type(ret_type)
+        }
         if allow_oob:
             obj['allow-oob'] = allow_oob
         self._gen_tree(name, 'command', obj, ifcond, features)
 
-    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+    def visit_event(self, name: str, info: Optional[QAPISourceInfo],
+                    ifcond: List[str], features: List[QAPISchemaFeature],
+                    arg_type: Optional[QAPISchemaObjectType],
+                    boxed: bool) -> None:
         assert self._schema is not None
+
         arg_type = arg_type or self._schema.the_empty_object_type
         self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)},
                        ifcond, features)
 
 
-def gen_introspect(schema, output_dir, prefix, opt_unmask):
+def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str,
+                   opt_unmask: bool) -> None:
     vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask)
     schema.visit(vis)
     vis.write(output_dir)
diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
index 04bd5db5278..0a000d58b37 100644
--- a/scripts/qapi/mypy.ini
+++ b/scripts/qapi/mypy.ini
@@ -13,11 +13,6 @@ disallow_untyped_defs = False
 disallow_incomplete_defs = False
 check_untyped_defs = False
 
-[mypy-qapi.introspect]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
 [mypy-qapi.parser]
 disallow_untyped_defs = False
 disallow_incomplete_defs = False
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index 353e8020a27..ff16578f6de 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -28,7 +28,7 @@
 class QAPISchemaEntity:
     meta: Optional[str] = None
 
-    def __init__(self, name, info, doc, ifcond=None, features=None):
+    def __init__(self, name: str, info, doc, ifcond=None, features=None):
         assert name is None or isinstance(name, str)
         for f in features or []:
             assert isinstance(f, QAPISchemaFeature)
-- 
2.29.2


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by Markus Armbruster 4 years, 9 months ago
John Snow <jsnow@redhat.com> writes:

> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
>  scripts/qapi/mypy.ini      |   5 --
>  scripts/qapi/schema.py     |   2 +-
>  3 files changed, 82 insertions(+), 40 deletions(-)
>
> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
> index 60ec326d2c7..b7f2a6cf260 100644
> --- a/scripts/qapi/introspect.py
> +++ b/scripts/qapi/introspect.py
> @@ -30,10 +30,19 @@
>  )
>  from .gen import QAPISchemaMonolithicCVisitor
>  from .schema import (
> +    QAPISchema,
>      QAPISchemaArrayType,
>      QAPISchemaBuiltinType,
> +    QAPISchemaEntity,
> +    QAPISchemaEnumMember,
> +    QAPISchemaFeature,
> +    QAPISchemaObjectType,
> +    QAPISchemaObjectTypeMember,
>      QAPISchemaType,
> +    QAPISchemaVariant,
> +    QAPISchemaVariants,
>  )
> +from .source import QAPISourceInfo
>  
>  
>  # This module constructs a tree data structure that is used to
> @@ -57,6 +66,8 @@
   # generate the introspection information for QEMU. It behaves similarly
   # to a JSON value.
   #
   # A complexity over JSON is that our values may or may not be annotated.
   #
   # Un-annotated values may be:
   #     Scalar: str, bool, None.
   #     Non-scalar: List, Dict
   # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]]
   #
   # With optional annotations, the type of all values is:
   # TreeValue = Union[_value, Annotated[_value]]
   #
   # Sadly, mypy does not support recursive types, so we must approximate this.
   _stub = Any
   _scalar = Union[str, bool, None]
   _nonscalar = Union[Dict[str, _stub], List[_stub]]
>  _value = Union[_scalar, _nonscalar]
>  TreeValue = Union[_value, 'Annotated[_value]']
>  
> +# This is a (strict) alias for an arbitrary object non-scalar, as above:
> +_DObject = Dict[str, object]

Sounds greek :)

It's almost the Dict part of _nonscalar, but not quite: object vs. Any.

I naively expect something closer to

   _scalar = ...
   _object = Dict[str, _stub]
   _nonscalar = Union[_object, List[_stub]

and (still naively) expect _object to be good enough to serve as type
annotation for dicts representing JSON objects.

[...]


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by John Snow 4 years, 9 months ago
On 2/3/21 10:15 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
> 
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
>>   scripts/qapi/mypy.ini      |   5 --
>>   scripts/qapi/schema.py     |   2 +-
>>   3 files changed, 82 insertions(+), 40 deletions(-)
>>
>> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
>> index 60ec326d2c7..b7f2a6cf260 100644
>> --- a/scripts/qapi/introspect.py
>> +++ b/scripts/qapi/introspect.py
>> @@ -30,10 +30,19 @@
>>   )
>>   from .gen import QAPISchemaMonolithicCVisitor
>>   from .schema import (
>> +    QAPISchema,
>>       QAPISchemaArrayType,
>>       QAPISchemaBuiltinType,
>> +    QAPISchemaEntity,
>> +    QAPISchemaEnumMember,
>> +    QAPISchemaFeature,
>> +    QAPISchemaObjectType,
>> +    QAPISchemaObjectTypeMember,
>>       QAPISchemaType,
>> +    QAPISchemaVariant,
>> +    QAPISchemaVariants,
>>   )
>> +from .source import QAPISourceInfo
>>   
>>   
>>   # This module constructs a tree data structure that is used to
>> @@ -57,6 +66,8 @@
>     # generate the introspection information for QEMU. It behaves similarly
>     # to a JSON value.
>     #
>     # A complexity over JSON is that our values may or may not be annotated.
>     #
>     # Un-annotated values may be:
>     #     Scalar: str, bool, None.
>     #     Non-scalar: List, Dict
>     # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]]
>     #
>     # With optional annotations, the type of all values is:
>     # TreeValue = Union[_value, Annotated[_value]]
>     #
>     # Sadly, mypy does not support recursive types, so we must approximate this.
>     _stub = Any
>     _scalar = Union[str, bool, None]
>     _nonscalar = Union[Dict[str, _stub], List[_stub]]
>>   _value = Union[_scalar, _nonscalar]
>>   TreeValue = Union[_value, 'Annotated[_value]']
>>   
>> +# This is a (strict) alias for an arbitrary object non-scalar, as above:
>> +_DObject = Dict[str, object]
> 
> Sounds greek :)
> 

Admittedly it is still not explained well ... until the next patch. I'm 
going to leave it alone for now until you have a chance to respond to 
these walls of text.

> It's almost the Dict part of _nonscalar, but not quite: object vs. Any.
> 
> I naively expect something closer to
> 
>     _scalar = ...
>     _object = Dict[str, _stub]
>     _nonscalar = Union[_object, List[_stub]
> 
> and (still naively) expect _object to be good enough to serve as type
> annotation for dicts representing JSON objects.

"_object" would be good, except ... I am trying to avoid using that word 
because what does it mean? Python object? JSON object? Here at the 
boundary between two worlds, nothing makes sense.

(See patch 12/14 for A More Betterer Understanding of what _DObject is 
used for. It will contribute to A Greater Understanding.)

Anyway, to your questions;

(1) _DObject was my shorthand garbage way of saying "This is a Python 
Dict that represents a JSON object". Hence Dict-Object, "DObject". I 
have also derisively called this a "dictly-typed" object at times.

(2) Dict[str, Any] and Dict[str, object] are similar, but do have a 
semantic difference. I alluded to it by calling this a "(strict) alias"; 
which does not help you understand any of the following points:

Whenever you use "Any", it basically turns off type-checking at that 
boundary; it is the gradually typed boundary type. Avoid it whenever 
reasonably possible.

Example time:


def foo(thing: Any) -> None:
     print(thing.value)  # Sure, I guess! We'll believe you.


def foo(thing: object) -> None:
     print(thing.value)  # BZZT, Python object has no value prop.


Use "Any" when you really just cannot constrain the type, because you're 
out of bourbon or you've decided to give in to the darkness inside your 
heart.

Use "object" when the type of the value actually doesn't matter, because 
you are only passing it on to something else later that will do its own 
type analysis or introspection on the object.

For introspect.py, 'object' is actually a really good type when we can 
use it, because we interrogate the type exhaustively upon receipt in 
_tree_to_qlit.


That leaves one question you would almost assuredly ask as a followup:

"Why didn't you use object for the stub type to begin with?"

Let's say we define _stub as `object` instead, the Python object. When 
_tree_to_qlit recurses on non-scalar structures, the held value there is 
only known as "object" and not as str/bool/None, which causes a typing 
error at that point.

Moving the stub to "Any" tells mypy to ... not worry about what type we 
actually passed here. I gave in to the darkness in my heart. It's just 
too annoying without real recursion.

--js


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by Markus Armbruster 4 years, 9 months ago
John Snow <jsnow@redhat.com> writes:

> On 2/3/21 10:15 AM, Markus Armbruster wrote:
>> John Snow <jsnow@redhat.com> writes:
>> 
>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>> ---
>>>   scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
>>>   scripts/qapi/mypy.ini      |   5 --
>>>   scripts/qapi/schema.py     |   2 +-
>>>   3 files changed, 82 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
>>> index 60ec326d2c7..b7f2a6cf260 100644
>>> --- a/scripts/qapi/introspect.py
>>> +++ b/scripts/qapi/introspect.py
>>> @@ -30,10 +30,19 @@
>>>   )
>>>   from .gen import QAPISchemaMonolithicCVisitor
>>>   from .schema import (
>>> +    QAPISchema,
>>>       QAPISchemaArrayType,
>>>       QAPISchemaBuiltinType,
>>> +    QAPISchemaEntity,
>>> +    QAPISchemaEnumMember,
>>> +    QAPISchemaFeature,
>>> +    QAPISchemaObjectType,
>>> +    QAPISchemaObjectTypeMember,
>>>       QAPISchemaType,
>>> +    QAPISchemaVariant,
>>> +    QAPISchemaVariants,
>>>   )
>>> +from .source import QAPISourceInfo
>>>   
>>>   
>>>   # This module constructs a tree data structure that is used to
>>> @@ -57,6 +66,8 @@
>>     # generate the introspection information for QEMU. It behaves similarly
>>     # to a JSON value.
>>     #
>>     # A complexity over JSON is that our values may or may not be annotated.
>>     #
>>     # Un-annotated values may be:
>>     #     Scalar: str, bool, None.
>>     #     Non-scalar: List, Dict
>>     # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]]
>>     #
>>     # With optional annotations, the type of all values is:
>>     # TreeValue = Union[_value, Annotated[_value]]
>>     #
>>     # Sadly, mypy does not support recursive types, so we must approximate this.
>>     _stub = Any
>>     _scalar = Union[str, bool, None]
>>     _nonscalar = Union[Dict[str, _stub], List[_stub]]
>>>   _value = Union[_scalar, _nonscalar]
>>>   TreeValue = Union[_value, 'Annotated[_value]']

I'm once again terminally confused about when to use _lower_case and
when to use CamelCase for such variables.

The reader has to connect _stub = Any back "we must approximate this".
Hmm... "we approximate with Any"?

>>>   
>>> +# This is a (strict) alias for an arbitrary object non-scalar, as above:
>>> +_DObject = Dict[str, object]
>> 
>> Sounds greek :)
>> 
>
> Admittedly it is still not explained well ... until the next patch. I'm 
> going to leave it alone for now until you have a chance to respond to 
> these walls of text.

You explain it some futher down.

>> It's almost the Dict part of _nonscalar, but not quite: object vs. Any.
>> 
>> I naively expect something closer to
>> 
>>     _scalar = ...
>>     _object = Dict[str, _stub]
>>     _nonscalar = Union[_object, List[_stub]
>> 
>> and (still naively) expect _object to be good enough to serve as type
>> annotation for dicts representing JSON objects.
>
> "_object" would be good, except ... I am trying to avoid using that word 
> because what does it mean? Python object? JSON object? Here at the 
> boundary between two worlds, nothing makes sense.

Naming is hard.

We talked about these names in review of v2.  Let me try again.

introspect.py needs to generate (a suitable C representation of) an
instance of QAPI type '[SchemaInfo]'.

Its current choice of "suitable C representation" is "a QLitQObject
initializer with #if and comments".  This is a "lose" representation:
QLitQObject can encode pretty much anything, not just instances of
'[SchemaInfo]'.

C code converts this QLitQObject to a SchemaInfoList object[*].
SchemaInfoList is the C type for QAPI type '[SchemaInfo]'.  Automated
tests ensure this conversion cannot fail, i.e. the "lose" QLitQObject
actually encodes a '[SchemaInfo]'.

introspect.py separates concerns: it first builds an abstract
representation of "set of QObject with #if and comments", then generates
C code from that.

Why "QObject with #if and comments", and not "QLitQObject with #if and
comments"?  Because QLitQObject is *one* way to represent QObject, and
we don't care which way outside C code generation.

A QObject represents a JSON value.  We could just as well say "JSON
value with #if and comments".

So, the abstract representation of "JSON value with #if and comments" is
what we're trying to type.  If you'd rather say "QObject with #if and
comments", that's fine.

Our abstract representation is a tree, where

* JSON null / QNull is represented as Python None

* JSON string / QString as str

* JSON true and false / QBool as bool

* JSON number / QNum is not implemented

* JSON object / QDict is dict mapping string keys to sub-trees

* JSON array / QList is list of sub-trees

* #if and comment tacked to a sub-tree is represented by wrapping the
  subtree in Annotated

  Wrapping a sub-tree that is already wrapped seems mostly useless, but
  the code doesn't care.

  Wrapping dictionary values makes no sense.  The code doesn't care, and
  gives you GIGO.

  Making the code reject these two feels out of scope.  If you want to
  anyway, I won't object unless it gets in the way of "in scope" stuff
  (right now it doesn't seem to).

Let me stress once again: this is *not* an abstract representation of a
'SchemaInfo'.  Such a representation would also work, and you might like
it better, but it's simply not what we have.  Evidence: _tree_to_qlit()
works fine for *any* tree, not just for trees that encode instances of
'SchemaInfo'.

Since each (sub-)tree represents a JSON value / QObject, possibly with
annotations, I'd like to propose a few "obvious" (hahaha) names:

* an unannotated QObject: QObject

* an annotated QObject: AnnotatedQObject

* a possibly annotated QObject: PossiblyAnnotatedQObject

  Too long.  Rename QObject to BareQObject, then call this one QObject.

This gives us:

    _BareQObject = Union[None, str, bool, Dict[str, Any], List[Any]]
    _AnnotatedQObject = Annotated[_QObject]
    _QObject = Union[_BareQObject, _AnnotatedQObject]

Feel free to replace QObject by JsonValue in these names if you like
that better.  I think I'd slightly prefer JsonValue right now.

Now back to _DObject:

> (See patch 12/14 for A More Betterer Understanding of what _DObject is 
> used for. It will contribute to A Greater Understanding.)
>
> Anyway, to your questions;
>
> (1) _DObject was my shorthand garbage way of saying "This is a Python 
> Dict that represents a JSON object". Hence Dict-Object, "DObject". I 
> have also derisively called this a "dictly-typed" object at times.

In the naming system I proposed, this is BareQDict, with an additional
complication: we actually have two different types for the same thing,
an anonymous one within _BareQObject, and a named one.

> (2) Dict[str, Any] and Dict[str, object] are similar, but do have a 

The former is the anonymous one, the latter the named one.

> semantic difference. I alluded to it by calling this a "(strict) alias"; 
> which does not help you understand any of the following points:
>
> Whenever you use "Any", it basically turns off type-checking at that 
> boundary; it is the gradually typed boundary type. Avoid it whenever 
> reasonably possible.
>
> Example time:
>
>
> def foo(thing: Any) -> None:
>      print(thing.value)  # Sure, I guess! We'll believe you.
>
>
> def foo(thing: object) -> None:
>      print(thing.value)  # BZZT, Python object has no value prop.
>
>
> Use "Any" when you really just cannot constrain the type, because you're 
> out of bourbon or you've decided to give in to the darkness inside your 
> heart.
>
> Use "object" when the type of the value actually doesn't matter, because 
> you are only passing it on to something else later that will do its own 
> type analysis or introspection on the object.
>
> For introspect.py, 'object' is actually a really good type when we can 
> use it, because we interrogate the type exhaustively upon receipt in 
> _tree_to_qlit.
>
>
> That leaves one question you would almost assuredly ask as a followup:
>
> "Why didn't you use object for the stub type to begin with?"
>
> Let's say we define _stub as `object` instead, the Python object. When 
> _tree_to_qlit recurses on non-scalar structures, the held value there is 
> only known as "object" and not as str/bool/None, which causes a typing 
> error at that point.
>
> Moving the stub to "Any" tells mypy to ... not worry about what type we 
> actually passed here. I gave in to the darkness in my heart. It's just 
> too annoying without real recursion.

May I have an abridged version of this in the comments?  It might look
quaint in ten years, when we're all fluent in Python type annotations.
But right now, at least some readers aren't, and they can use a bit of
help.


[*] Actually, we take a shortcut and convert straight to QObject, but
that's just laziness.  See qmp_query_qmp_schema()'s "Minor hack:"
comment.


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by John Snow 4 years, 9 months ago
On 2/5/21 8:42 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
> 
>> On 2/3/21 10:15 AM, Markus Armbruster wrote:
>>> John Snow <jsnow@redhat.com> writes:
>>>
>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>> ---
>>>>    scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
>>>>    scripts/qapi/mypy.ini      |   5 --
>>>>    scripts/qapi/schema.py     |   2 +-
>>>>    3 files changed, 82 insertions(+), 40 deletions(-)
>>>>
>>>> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
>>>> index 60ec326d2c7..b7f2a6cf260 100644
>>>> --- a/scripts/qapi/introspect.py
>>>> +++ b/scripts/qapi/introspect.py
>>>> @@ -30,10 +30,19 @@
>>>>    )
>>>>    from .gen import QAPISchemaMonolithicCVisitor
>>>>    from .schema import (
>>>> +    QAPISchema,
>>>>        QAPISchemaArrayType,
>>>>        QAPISchemaBuiltinType,
>>>> +    QAPISchemaEntity,
>>>> +    QAPISchemaEnumMember,
>>>> +    QAPISchemaFeature,
>>>> +    QAPISchemaObjectType,
>>>> +    QAPISchemaObjectTypeMember,
>>>>        QAPISchemaType,
>>>> +    QAPISchemaVariant,
>>>> +    QAPISchemaVariants,
>>>>    )
>>>> +from .source import QAPISourceInfo
>>>>    
>>>>    
>>>>    # This module constructs a tree data structure that is used to
>>>> @@ -57,6 +66,8 @@
>>>      # generate the introspection information for QEMU. It behaves similarly
>>>      # to a JSON value.
>>>      #
>>>      # A complexity over JSON is that our values may or may not be annotated.
>>>      #
>>>      # Un-annotated values may be:
>>>      #     Scalar: str, bool, None.
>>>      #     Non-scalar: List, Dict
>>>      # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]]
>>>      #
>>>      # With optional annotations, the type of all values is:
>>>      # TreeValue = Union[_value, Annotated[_value]]
>>>      #
>>>      # Sadly, mypy does not support recursive types, so we must approximate this.
>>>      _stub = Any
>>>      _scalar = Union[str, bool, None]
>>>      _nonscalar = Union[Dict[str, _stub], List[_stub]]
>>>>    _value = Union[_scalar, _nonscalar]
>>>>    TreeValue = Union[_value, 'Annotated[_value]']
> 
> I'm once again terminally confused about when to use _lower_case and
> when to use CamelCase for such variables.
> 

That's my fault for not using them consistently.

Generally:

TitleCase: Classes, Real Type Names :tm:
lowercase: instance names (and certain built-in types like str/bool/int)
UPPERCASE: "Constants". This is an extremely loose idea in Python.

I use the "_" prefix for any of the above categories to indicate 
something not intended to be used outside of the current scope. These 
types won't be accessible outside the module by default.

TypeVars I use "T", "U", "V", etc unless I bind them to another type; 
then I use e.g. NodeT instead.

When it comes to things like type aliases, I believe I instinctively 
used lowercase because I am not creating a new Real Type and wanted some 
visual distinction from a real class name. (aliases created in this way 
cannot be used with isinstance and hold no significance to mypy.)

That's why I used _stub, _scalar, _nonscalar, and _value for those types 
there. Then I disregarded my own convention and used TreeValue; perhaps 
that ought to be tree_value for consistency as it's not a Real Type :tm:

...but then we have the SchemaInfo type aliases, which I named using the 
same type name as they use in QAPI to help paint the association (and 
pick up 'git grep' searchers.)

Not fantastically consistent, sorry. Feel free to express a preference, 
I clearly don't have a universally applied one.

(Current leaning: rename TreeValue to tree_value, but leave everything 
else as it is.)

> The reader has to connect _stub = Any back "we must approximate this".
> Hmm... "we approximate with Any"?
> 

I can try to be more explicit about it.

>>>>    
>>>> +# This is a (strict) alias for an arbitrary object non-scalar, as above:
>>>> +_DObject = Dict[str, object]
>>>
>>> Sounds greek :)
>>>
>>
>> Admittedly it is still not explained well ... until the next patch. I'm
>> going to leave it alone for now until you have a chance to respond to
>> these walls of text.
> 
> You explain it some futher down.
> 
>>> It's almost the Dict part of _nonscalar, but not quite: object vs. Any.
>>>
>>> I naively expect something closer to
>>>
>>>      _scalar = ...
>>>      _object = Dict[str, _stub]
>>>      _nonscalar = Union[_object, List[_stub]
>>>
>>> and (still naively) expect _object to be good enough to serve as type
>>> annotation for dicts representing JSON objects.
>>
>> "_object" would be good, except ... I am trying to avoid using that word
>> because what does it mean? Python object? JSON object? Here at the
>> boundary between two worlds, nothing makes sense.
> 
> Naming is hard.
> 

Yep. We can skip this debate by just naming the incoming types 
SchemaInfo and similar... (cont'd below)

> We talked about these names in review of v2.  Let me try again.
> 
> introspect.py needs to generate (a suitable C representation of) an
> instance of QAPI type '[SchemaInfo]'.
> 
> Its current choice of "suitable C representation" is "a QLitQObject
> initializer with #if and comments".  This is a "lose" representation:
> QLitQObject can encode pretty much anything, not just instances of
> '[SchemaInfo]'.
> 
> C code converts this QLitQObject to a SchemaInfoList object[*].
> SchemaInfoList is the C type for QAPI type '[SchemaInfo]'.  Automated
> tests ensure this conversion cannot fail, i.e. the "lose" QLitQObject
> actually encodes a '[SchemaInfo]'.
> 
> introspect.py separates concerns: it first builds an abstract
> representation of "set of QObject with #if and comments", then generates
> C code from that.
> 
> Why "QObject with #if and comments", and not "QLitQObject with #if and
> comments"?  Because QLitQObject is *one* way to represent QObject, and
> we don't care which way outside C code generation.
> 
> A QObject represents a JSON value.  We could just as well say "JSON
> value with #if and comments".
> 
> So, the abstract representation of "JSON value with #if and comments" is
> what we're trying to type.  If you'd rather say "QObject with #if and
> comments", that's fine.
> 
> Our abstract representation is a tree, where
> 
> * JSON null / QNull is represented as Python None
> 
> * JSON string / QString as str
> 
> * JSON true and false / QBool as bool
> 
> * JSON number / QNum is not implemented
> 
> * JSON object / QDict is dict mapping string keys to sub-trees
> 
> * JSON array / QList is list of sub-trees
> 
> * #if and comment tacked to a sub-tree is represented by wrapping the
>    subtree in Annotated
> 
>    Wrapping a sub-tree that is already wrapped seems mostly useless, but
>    the code doesn't care.
> 
>    Wrapping dictionary values makes no sense.  The code doesn't care, and
>    gives you GIGO.
> 
>    Making the code reject these two feels out of scope.  If you want to
>    anyway, I won't object unless it gets in the way of "in scope" stuff
>    (right now it doesn't seem to).
> 
> Let me stress once again: this is *not* an abstract representation of a
> 'SchemaInfo'.  Such a representation would also work, and you might like
> it better, but it's simply not what we have.  Evidence: _tree_to_qlit()
> works fine for *any* tree, not just for trees that encode instances of
> 'SchemaInfo'.
> 

... as long as you don't feel that's incorrect to do. We are free to 
name those structures SchemaInfo but type _tree_to_qlit() in terms of 
generic Dict objects, leaving us without a middle-abstract thing to name 
at all.

Based on your review of the "dummy types" patch, I'm going to assume 
that's fine.

> Since each (sub-)tree represents a JSON value / QObject, possibly with
> annotations, I'd like to propose a few "obvious" (hahaha) names:
> 
> * an unannotated QObject: QObject
> 
> * an annotated QObject: AnnotatedQObject
> 
> * a possibly annotated QObject: PossiblyAnnotatedQObject
> 
>    Too long.  Rename QObject to BareQObject, then call this one QObject.
> 
> This gives us:
> 
>      _BareQObject = Union[None, str, bool, Dict[str, Any], List[Any]]
>      _AnnotatedQObject = Annotated[_QObject]
>      _QObject = Union[_BareQObject, _AnnotatedQObject]
> 
> Feel free to replace QObject by JsonValue in these names if you like
> that better.  I think I'd slightly prefer JsonValue right now.
> 
> Now back to _DObject:
> 
>> (See patch 12/14 for A More Betterer Understanding of what _DObject is
>> used for. It will contribute to A Greater Understanding.)
>>
>> Anyway, to your questions;
>>
>> (1) _DObject was my shorthand garbage way of saying "This is a Python
>> Dict that represents a JSON object". Hence Dict-Object, "DObject". I
>> have also derisively called this a "dictly-typed" object at times.
> 
> In the naming system I proposed, this is BareQDict, with an additional
> complication: we actually have two different types for the same thing,
> an anonymous one within _BareQObject, and a named one.
> 
>> (2) Dict[str, Any] and Dict[str, object] are similar, but do have a
> 
> The former is the anonymous one, the latter the named one.
> 

Kinda-sorta. I am talking about pure mypy here, and the differences 
between typing two things this way.

Though I think you're right: I used the "Any" form for the anonymous 
type (inherent to the structure of a JSON compound type) and the 
"object" form for the named forms (The SchemaInfo objects we build in 
the visitors to pass to the generator later).

>> semantic difference. I alluded to it by calling this a "(strict) alias";
>> which does not help you understand any of the following points:
>>
>> Whenever you use "Any", it basically turns off type-checking at that
>> boundary; it is the gradually typed boundary type. Avoid it whenever
>> reasonably possible.
>>
>> Example time:
>>
>>
>> def foo(thing: Any) -> None:
>>       print(thing.value)  # Sure, I guess! We'll believe you.
>>
>>
>> def foo(thing: object) -> None:
>>       print(thing.value)  # BZZT, Python object has no value prop.
>>
>>
>> Use "Any" when you really just cannot constrain the type, because you're
>> out of bourbon or you've decided to give in to the darkness inside your
>> heart.
>>
>> Use "object" when the type of the value actually doesn't matter, because
>> you are only passing it on to something else later that will do its own
>> type analysis or introspection on the object.
>>
>> For introspect.py, 'object' is actually a really good type when we can
>> use it, because we interrogate the type exhaustively upon receipt in
>> _tree_to_qlit.
>>
>>
>> That leaves one question you would almost assuredly ask as a followup:
>>
>> "Why didn't you use object for the stub type to begin with?"
>>
>> Let's say we define _stub as `object` instead, the Python object. When
>> _tree_to_qlit recurses on non-scalar structures, the held value there is
>> only known as "object" and not as str/bool/None, which causes a typing
>> error at that point.
>>
>> Moving the stub to "Any" tells mypy to ... not worry about what type we
>> actually passed here. I gave in to the darkness in my heart. It's just
>> too annoying without real recursion.
> 
> May I have an abridged version of this in the comments?  It might look
> quaint in ten years, when we're all fluent in Python type annotations.
> But right now, at least some readers aren't, and they can use a bit of
> help.
> 

Yeah, I'm sympathetic to that.... though I'm not sure what to write or 
where. I can add some reference points in the commit message, like this one:

https://mypy.readthedocs.io/en/stable/dynamic_typing.html#any-vs-object

maybe in conjunction with the named type aliases patch this is actually 
sufficient?

> 
> [*] Actually, we take a shortcut and convert straight to QObject, but
> that's just laziness.  See qmp_query_qmp_schema()'s "Minor hack:"
> comment.
> 

:)


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by Markus Armbruster 4 years, 9 months ago
John Snow <jsnow@redhat.com> writes:

> On 2/5/21 8:42 AM, Markus Armbruster wrote:
>> John Snow <jsnow@redhat.com> writes:
>> 
>>> On 2/3/21 10:15 AM, Markus Armbruster wrote:
>>>> John Snow <jsnow@redhat.com> writes:
>>>>
>>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>>> ---
>>>>>    scripts/qapi/introspect.py | 115 ++++++++++++++++++++++++++-----------
>>>>>    scripts/qapi/mypy.ini      |   5 --
>>>>>    scripts/qapi/schema.py     |   2 +-
>>>>>    3 files changed, 82 insertions(+), 40 deletions(-)
>>>>>
>>>>> diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
>>>>> index 60ec326d2c7..b7f2a6cf260 100644
>>>>> --- a/scripts/qapi/introspect.py
>>>>> +++ b/scripts/qapi/introspect.py
>>>>> @@ -30,10 +30,19 @@
>>>>>    )
>>>>>    from .gen import QAPISchemaMonolithicCVisitor
>>>>>    from .schema import (
>>>>> +    QAPISchema,
>>>>>        QAPISchemaArrayType,
>>>>>        QAPISchemaBuiltinType,
>>>>> +    QAPISchemaEntity,
>>>>> +    QAPISchemaEnumMember,
>>>>> +    QAPISchemaFeature,
>>>>> +    QAPISchemaObjectType,
>>>>> +    QAPISchemaObjectTypeMember,
>>>>>        QAPISchemaType,
>>>>> +    QAPISchemaVariant,
>>>>> +    QAPISchemaVariants,
>>>>>    )
>>>>> +from .source import QAPISourceInfo
>>>>>    
>>>>>    
>>>>>    # This module constructs a tree data structure that is used to
>>>>> @@ -57,6 +66,8 @@
>>>>      # generate the introspection information for QEMU. It behaves similarly
>>>>      # to a JSON value.
>>>>      #
>>>>      # A complexity over JSON is that our values may or may not be annotated.
>>>>      #
>>>>      # Un-annotated values may be:
>>>>      #     Scalar: str, bool, None.
>>>>      #     Non-scalar: List, Dict
>>>>      # _value = Union[str, bool, None, Dict[str, TreeValue], List[TreeValue]]
>>>>      #
>>>>      # With optional annotations, the type of all values is:
>>>>      # TreeValue = Union[_value, Annotated[_value]]
>>>>      #
>>>>      # Sadly, mypy does not support recursive types, so we must approximate this.
>>>>      _stub = Any
>>>>      _scalar = Union[str, bool, None]
>>>>      _nonscalar = Union[Dict[str, _stub], List[_stub]]
>>>>>    _value = Union[_scalar, _nonscalar]
>>>>>    TreeValue = Union[_value, 'Annotated[_value]']
>> 
>> I'm once again terminally confused about when to use _lower_case and
>> when to use CamelCase for such variables.
>> 
>
> That's my fault for not using them consistently.
>
> Generally:
>
> TitleCase: Classes, Real Type Names :tm:
> lowercase: instance names (and certain built-in types like str/bool/int)
> UPPERCASE: "Constants". This is an extremely loose idea in Python.
>
> I use the "_" prefix for any of the above categories to indicate 
> something not intended to be used outside of the current scope. These 
> types won't be accessible outside the module by default.
>
> TypeVars I use "T", "U", "V", etc unless I bind them to another type; 
> then I use e.g. NodeT instead.
>
> When it comes to things like type aliases, I believe I instinctively 
> used lowercase because I am not creating a new Real Type and wanted some 
> visual distinction from a real class name. (aliases created in this way 
> cannot be used with isinstance and hold no significance to mypy.)
>
> That's why I used _stub, _scalar, _nonscalar, and _value for those types 
> there. Then I disregarded my own convention and used TreeValue; perhaps 
> that ought to be tree_value for consistency as it's not a Real Type :tm:
>
> ...but then we have the SchemaInfo type aliases, which I named using the 
> same type name as they use in QAPI to help paint the association (and 
> pick up 'git grep' searchers.)
>
> Not fantastically consistent, sorry. Feel free to express a preference, 
> I clearly don't have a universally applied one.
>
> (Current leaning: rename TreeValue to tree_value, but leave everything 
> else as it is.)

https://www.python.org/dev/peps/pep-0484/#type-aliases

    Note that we recommend capitalizing alias names, since they
    represent user-defined types, which (like user-defined classes) are
    typically spelled that way.

I think this wants names like _Scalar, _NonScalar, _Value, TreeValue.

>> The reader has to connect _stub = Any back "we must approximate this".
>> Hmm... "we approximate with Any"?
>> 
>
> I can try to be more explicit about it.
>
>>>>>    
>>>>> +# This is a (strict) alias for an arbitrary object non-scalar, as above:
>>>>> +_DObject = Dict[str, object]
>>>>
>>>> Sounds greek :)
>>>>
>>>
>>> Admittedly it is still not explained well ... until the next patch. I'm
>>> going to leave it alone for now until you have a chance to respond to
>>> these walls of text.
>> 
>> You explain it some futher down.
>> 
>>>> It's almost the Dict part of _nonscalar, but not quite: object vs. Any.
>>>>
>>>> I naively expect something closer to
>>>>
>>>>      _scalar = ...
>>>>      _object = Dict[str, _stub]
>>>>      _nonscalar = Union[_object, List[_stub]
>>>>
>>>> and (still naively) expect _object to be good enough to serve as type
>>>> annotation for dicts representing JSON objects.
>>>
>>> "_object" would be good, except ... I am trying to avoid using that word
>>> because what does it mean? Python object? JSON object? Here at the
>>> boundary between two worlds, nothing makes sense.
>> 
>> Naming is hard.
>> 
>
> Yep. We can skip this debate by just naming the incoming types 
> SchemaInfo and similar... (cont'd below)
>
>> We talked about these names in review of v2.  Let me try again.
>> 
>> introspect.py needs to generate (a suitable C representation of) an
>> instance of QAPI type '[SchemaInfo]'.
>> 
>> Its current choice of "suitable C representation" is "a QLitQObject
>> initializer with #if and comments".  This is a "lose" representation:
>> QLitQObject can encode pretty much anything, not just instances of
>> '[SchemaInfo]'.
>> 
>> C code converts this QLitQObject to a SchemaInfoList object[*].
>> SchemaInfoList is the C type for QAPI type '[SchemaInfo]'.  Automated
>> tests ensure this conversion cannot fail, i.e. the "lose" QLitQObject
>> actually encodes a '[SchemaInfo]'.
>> 
>> introspect.py separates concerns: it first builds an abstract
>> representation of "set of QObject with #if and comments", then generates
>> C code from that.
>> 
>> Why "QObject with #if and comments", and not "QLitQObject with #if and
>> comments"?  Because QLitQObject is *one* way to represent QObject, and
>> we don't care which way outside C code generation.
>> 
>> A QObject represents a JSON value.  We could just as well say "JSON
>> value with #if and comments".
>> 
>> So, the abstract representation of "JSON value with #if and comments" is
>> what we're trying to type.  If you'd rather say "QObject with #if and
>> comments", that's fine.
>> 
>> Our abstract representation is a tree, where
>> 
>> * JSON null / QNull is represented as Python None
>> 
>> * JSON string / QString as str
>> 
>> * JSON true and false / QBool as bool
>> 
>> * JSON number / QNum is not implemented
>> 
>> * JSON object / QDict is dict mapping string keys to sub-trees
>> 
>> * JSON array / QList is list of sub-trees
>> 
>> * #if and comment tacked to a sub-tree is represented by wrapping the
>>    subtree in Annotated
>> 
>>    Wrapping a sub-tree that is already wrapped seems mostly useless, but
>>    the code doesn't care.
>> 
>>    Wrapping dictionary values makes no sense.  The code doesn't care, and
>>    gives you GIGO.
>> 
>>    Making the code reject these two feels out of scope.  If you want to
>>    anyway, I won't object unless it gets in the way of "in scope" stuff
>>    (right now it doesn't seem to).
>> 
>> Let me stress once again: this is *not* an abstract representation of a
>> 'SchemaInfo'.  Such a representation would also work, and you might like
>> it better, but it's simply not what we have.  Evidence: _tree_to_qlit()
>> works fine for *any* tree, not just for trees that encode instances of
>> 'SchemaInfo'.
>> 
>
> ... as long as you don't feel that's incorrect to do. We are free to 
> name those structures SchemaInfo but type _tree_to_qlit() in terms of 
> generic Dict objects, leaving us without a middle-abstract thing to name 
> at all.
>
> Based on your review of the "dummy types" patch, I'm going to assume 
> that's fine.

I guess it's okayish enough.  It still feels more complicated to me than
it needs to be.

QAPISchemaGenIntrospectVisitor an abstract representation of "QObject
with #if and comments" for each SchemaInfo.

This is not really a representation of SchemaInfo.  We can choose to
name it that way regardless, if it helps, and we explain it properly.

Once we hand off the data to _tree_to_qlit(), we can't name it that way
anymore, simply because _tree_to_qlit() treats it as the stupid
recursive data structure it is, and doesn't need or want to know about
SchemaInfo.

I think I'd dispense with _DObject entirely, and use TreeValue
throughout.  Yes, we'd use Any a bit more.  I doubt the additional
complexity to *sometimes* use object instead is worthwhile.  This data
structure is used only within this file.  It pretty much never changes
(because JSON doesn't).  It's basically write-only in
QAPISchemaGenIntrospectVisitor.  This means all the extra typing work
buys us is use of object instead of Any where it doesn't actually
matter.

I would use a more telling name than TreeValue, though.  One that
actually hints at the kind of value "representation of QObject with #if
and comment".

>> Since each (sub-)tree represents a JSON value / QObject, possibly with
>> annotations, I'd like to propose a few "obvious" (hahaha) names:
>> 
>> * an unannotated QObject: QObject
>> 
>> * an annotated QObject: AnnotatedQObject
>> 
>> * a possibly annotated QObject: PossiblyAnnotatedQObject
>> 
>>    Too long.  Rename QObject to BareQObject, then call this one QObject.
>> 
>> This gives us:
>> 
>>      _BareQObject = Union[None, str, bool, Dict[str, Any], List[Any]]
>>      _AnnotatedQObject = Annotated[_QObject]
>>      _QObject = Union[_BareQObject, _AnnotatedQObject]
>> 
>> Feel free to replace QObject by JsonValue in these names if you like
>> that better.  I think I'd slightly prefer JsonValue right now.
>> 
>> Now back to _DObject:
>> 
>>> (See patch 12/14 for A More Betterer Understanding of what _DObject is
>>> used for. It will contribute to A Greater Understanding.)
>>>
>>> Anyway, to your questions;
>>>
>>> (1) _DObject was my shorthand garbage way of saying "This is a Python
>>> Dict that represents a JSON object". Hence Dict-Object, "DObject". I
>>> have also derisively called this a "dictly-typed" object at times.
>> 
>> In the naming system I proposed, this is BareQDict, with an additional
>> complication: we actually have two different types for the same thing,
>> an anonymous one within _BareQObject, and a named one.
>> 
>>> (2) Dict[str, Any] and Dict[str, object] are similar, but do have a
>> 
>> The former is the anonymous one, the latter the named one.
>> 
>
> Kinda-sorta. I am talking about pure mypy here, and the differences 
> between typing two things this way.
>
> Though I think you're right: I used the "Any" form for the anonymous 
> type (inherent to the structure of a JSON compound type) and the 
> "object" form for the named forms (The SchemaInfo objects we build in 
> the visitors to pass to the generator later).
>
>>> semantic difference. I alluded to it by calling this a "(strict) alias";
>>> which does not help you understand any of the following points:
>>>
>>> Whenever you use "Any", it basically turns off type-checking at that
>>> boundary; it is the gradually typed boundary type. Avoid it whenever
>>> reasonably possible.
>>>
>>> Example time:
>>>
>>>
>>> def foo(thing: Any) -> None:
>>>       print(thing.value)  # Sure, I guess! We'll believe you.
>>>
>>>
>>> def foo(thing: object) -> None:
>>>       print(thing.value)  # BZZT, Python object has no value prop.
>>>
>>>
>>> Use "Any" when you really just cannot constrain the type, because you're
>>> out of bourbon or you've decided to give in to the darkness inside your
>>> heart.
>>>
>>> Use "object" when the type of the value actually doesn't matter, because
>>> you are only passing it on to something else later that will do its own
>>> type analysis or introspection on the object.
>>>
>>> For introspect.py, 'object' is actually a really good type when we can
>>> use it, because we interrogate the type exhaustively upon receipt in
>>> _tree_to_qlit.
>>>
>>>
>>> That leaves one question you would almost assuredly ask as a followup:
>>>
>>> "Why didn't you use object for the stub type to begin with?"
>>>
>>> Let's say we define _stub as `object` instead, the Python object. When
>>> _tree_to_qlit recurses on non-scalar structures, the held value there is
>>> only known as "object" and not as str/bool/None, which causes a typing
>>> error at that point.
>>>
>>> Moving the stub to "Any" tells mypy to ... not worry about what type we
>>> actually passed here. I gave in to the darkness in my heart. It's just
>>> too annoying without real recursion.
>> 
>> May I have an abridged version of this in the comments?  It might look
>> quaint in ten years, when we're all fluent in Python type annotations.
>> But right now, at least some readers aren't, and they can use a bit of
>> help.
>> 
>
> Yeah, I'm sympathetic to that.... though I'm not sure what to write or 
> where. I can add some reference points in the commit message, like this one:
>
> https://mypy.readthedocs.io/en/stable/dynamic_typing.html#any-vs-object
>
> maybe in conjunction with the named type aliases patch this is actually 
> sufficient?

I can see two solutions right now:

1. Use Dict[str, Any] throughout

   All we need to explain is

   * What the data structure is about (JSON annotated with ifconds and
     comments; got that, could use improvement perhaps)

   * Your work-around for the lack of recursive types (got that
     already)

   * That the use of Any bypasses type static checking on use (shouldn't
     be hard)

   * Where such uses are (I believe only in _tree_to_qlit(), were Any
     can't be avoided anyway).

2. Use Dict[str, object] where we can

   Now we get to explain a few more things:

   * Why we bother (to get stricter static type checks on use)

   * Where such uses are (I can't see any offhand)

   * Maybe also where we go from one static type to the other.

In either case, we also need to pick names that need no explanation, or
explain them.

>> [*] Actually, we take a shortcut and convert straight to QObject, but
>> that's just laziness.  See qmp_query_qmp_schema()'s "Minor hack:"
>> comment.
>> 
>
> :)


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by John Snow 4 years, 9 months ago
On 2/9/21 4:06 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
>> On 2/5/21 8:42 AM, Markus Armbruster wrote:
>>> John Snow <jsnow@redhat.com> writes:

> https://www.python.org/dev/peps/pep-0484/#type-aliases
> 
>      Note that we recommend capitalizing alias names, since they
>      represent user-defined types, which (like user-defined classes) are
>      typically spelled that way.
> 
> I think this wants names like _Scalar, _NonScalar, _Value, TreeValue.
> 

Yeah, that seems like the only way to make it consistent and not have 
pylint yell at me. I will try to adhere to this in the future, but maybe 
pylint needs a bug report to make it complain in these cases, too.

[...]

>>
>> ... as long as you don't feel that's incorrect to do. We are free to
>> name those structures SchemaInfo but type _tree_to_qlit() in terms of
>> generic Dict objects, leaving us without a middle-abstract thing to name
>> at all.
>>
>> Based on your review of the "dummy types" patch, I'm going to assume
>> that's fine.
> 
> I guess it's okayish enough.  It still feels more complicated to me than
> it needs to be.
> 
> QAPISchemaGenIntrospectVisitor an abstract representation of "QObject
> with #if and comments" for each SchemaInfo.
> 
> This is not really a representation of SchemaInfo.  We can choose to
> name it that way regardless, if it helps, and we explain it properly.

In that: SchemaInfo do not have annotations, but we do. Our SchemaInfo 
objects here are in a kind of superposition in that we have not yet 
applied the if conditionals.

Still, I do think it is *very* helpful to name those instances after the 
SchemaInfo types, because that is absolutely the interface we are 
matching. The keys are not arbitrary. The types of the values associated 
with those keys are not arbitrary.

So, I am not sure how useful it is to make such a careful distinction. 
My instinct is "not very, especially for passers-by to this module."

> 
> Once we hand off the data to _tree_to_qlit(), we can't name it that way
> anymore, simply because _tree_to_qlit() treats it as the stupid
> recursive data structure it is, and doesn't need or want to know about
> SchemaInfo.
> 

Yes, this is fine: the data is being interpreted in a new semantic 
context. It keeps the mechanical type but loses the semantic 
information. That sounds normal to me.

"Why bother, then?"

Mostly for the notational benefit in the code BUILDING the objects. 
_tree_to_qlit is so generic you can barely describe it, but the objects 
we build to feed it are quite concrete and have names and definitions 
that can be referenced.

> I think I'd dispense with _DObject entirely, and use TreeValue
> throughout.  Yes, we'd use Any a bit more.  I doubt the additional
> complexity to *sometimes* use object instead is worthwhile.  This data

I have gotten rid of _DObject entirely in v5; though using "Any" 
everywhere doesn't seem like an obvious win to me, because I'd need to 
turn this:

_NonScalar = Union[Dict[str, _Stub], List[_Stub]]
[...]
SchemaInfo = Dict[str, object]
[...]

into this:

_JSONObject = Dict[str, _Stub]
_JSONArray = List[_Stub]
_NonScalar = Union[_JSONObject, _JSONArray]
[...]
SchemaInfo = _JSONObject
[...]

Which doesn't really strike me as any simpler or nicer on either the 
semantic or mechanical axes.

> structure is used only within this file.  It pretty much never changes
> (because JSON doesn't).  It's basically write-only in
> QAPISchemaGenIntrospectVisitor.  This means all the extra typing work

Write-only variables need typing! mypy will assume these objects are 
Dict[str, str] otherwise. We have to type them as something.

And the way I typed them ... is correct, and avoided having to name two 
more intermediary types.

> buys us is use of object instead of Any where it doesn't actually
> matter.
> 

Maybe so. Comes down to habits. My current belief is "Do not use Any if 
you do not have to." I did not have to, so I didn't.

> I would use a more telling name than TreeValue, though.  One that
> actually hints at the kind of value "representation of QObject with #if
> and comment".
> 

We discussed this on IRC; ultimately I wasn't convinced of the utility 
of naming the tree type "QObject" on the logic that if QLit is a 
QObject, a function named "QObject to QLit" didn't make sense to me anymore.

>>> Since each (sub-)tree represents a JSON value / QObject, possibly with
>>> annotations, I'd like to propose a few "obvious" (hahaha) names:
>>>
>>> * an unannotated QObject: QObject
>>>
>>> * an annotated QObject: AnnotatedQObject
>>>
>>> * a possibly annotated QObject: PossiblyAnnotatedQObject
>>>
>>>     Too long.  Rename QObject to BareQObject, then call this one QObject.
>>>
>>> This gives us:
>>>
>>>       _BareQObject = Union[None, str, bool, Dict[str, Any], List[Any]]
>>>       _AnnotatedQObject = Annotated[_QObject]
>>>       _QObject = Union[_BareQObject, _AnnotatedQObject]
>>>
>>> Feel free to replace QObject by JsonValue in these names if you like
>>> that better.  I think I'd slightly prefer JsonValue right now.
>>>

On IRC, We agreed to disagree on the semantic name and use the more 
mechanically suggestive JsonValue instead. I'll give that a spin.

(It's also kinda-sorta wrong, but everything has felt kinda-sorta wrong 
to me so far. Guess it's no better or worse.)

>>> Now back to _DObject:
>>>
>>>> (See patch 12/14 for A More Betterer Understanding of what _DObject is
>>>> used for. It will contribute to A Greater Understanding.)
>>>>
>>>> Anyway, to your questions;
>>>>
>>>> (1) _DObject was my shorthand garbage way of saying "This is a Python
>>>> Dict that represents a JSON object". Hence Dict-Object, "DObject". I
>>>> have also derisively called this a "dictly-typed" object at times.
>>>
>>> In the naming system I proposed, this is BareQDict, with an additional
>>> complication: we actually have two different types for the same thing,
>>> an anonymous one within _BareQObject, and a named one.
>>>
>>>> (2) Dict[str, Any] and Dict[str, object] are similar, but do have a
>>>
>>> The former is the anonymous one, the latter the named one.
>>>
>>
>> Kinda-sorta. I am talking about pure mypy here, and the differences
>> between typing two things this way.
>>
>> Though I think you're right: I used the "Any" form for the anonymous
>> type (inherent to the structure of a JSON compound type) and the
>> "object" form for the named forms (The SchemaInfo objects we build in
>> the visitors to pass to the generator later).
>>
>>>> semantic difference. I alluded to it by calling this a "(strict) alias";
>>>> which does not help you understand any of the following points:
>>>>
>>>> Whenever you use "Any", it basically turns off type-checking at that
>>>> boundary; it is the gradually typed boundary type. Avoid it whenever
>>>> reasonably possible.
>>>>
>>>> Example time:
>>>>
>>>>
>>>> def foo(thing: Any) -> None:
>>>>        print(thing.value)  # Sure, I guess! We'll believe you.
>>>>
>>>>
>>>> def foo(thing: object) -> None:
>>>>        print(thing.value)  # BZZT, Python object has no value prop.
>>>>
>>>>
>>>> Use "Any" when you really just cannot constrain the type, because you're
>>>> out of bourbon or you've decided to give in to the darkness inside your
>>>> heart.
>>>>
>>>> Use "object" when the type of the value actually doesn't matter, because
>>>> you are only passing it on to something else later that will do its own
>>>> type analysis or introspection on the object.
>>>>
>>>> For introspect.py, 'object' is actually a really good type when we can
>>>> use it, because we interrogate the type exhaustively upon receipt in
>>>> _tree_to_qlit.
>>>>
>>>>
>>>> That leaves one question you would almost assuredly ask as a followup:
>>>>
>>>> "Why didn't you use object for the stub type to begin with?"
>>>>
>>>> Let's say we define _stub as `object` instead, the Python object. When
>>>> _tree_to_qlit recurses on non-scalar structures, the held value there is
>>>> only known as "object" and not as str/bool/None, which causes a typing
>>>> error at that point.
>>>>
>>>> Moving the stub to "Any" tells mypy to ... not worry about what type we
>>>> actually passed here. I gave in to the darkness in my heart. It's just
>>>> too annoying without real recursion.
>>>
>>> May I have an abridged version of this in the comments?  It might look
>>> quaint in ten years, when we're all fluent in Python type annotations.
>>> But right now, at least some readers aren't, and they can use a bit of
>>> help.
>>>
>>
>> Yeah, I'm sympathetic to that.... though I'm not sure what to write or
>> where. I can add some reference points in the commit message, like this one:
>>
>> https://mypy.readthedocs.io/en/stable/dynamic_typing.html#any-vs-object
>>
>> maybe in conjunction with the named type aliases patch this is actually
>> sufficient?
> 
> I can see two solutions right now:
> 
> 1. Use Dict[str, Any] throughout
> 
>     All we need to explain is
> 
>     * What the data structure is about (JSON annotated with ifconds and
>       comments; got that, could use improvement perhaps)
> 
>     * Your work-around for the lack of recursive types (got that
>       already)
> 
>     * That the use of Any bypasses type static checking on use (shouldn't
>       be hard)
> 
>     * Where such uses are (I believe only in _tree_to_qlit(), were Any
>       can't be avoided anyway).
> 
> 2. Use Dict[str, object] where we can
> 
>     Now we get to explain a few more things:
> 
>     * Why we bother (to get stricter static type checks on use)
> 
>     * Where such uses are (I can't see any offhand)
> 
>     * Maybe also where we go from one static type to the other.
> 
> In either case, we also need to pick names that need no explanation, or
> explain them.
> 

"that need no explanation" (to whom?) Suspect this is impossible; 
there's gonna be explanations anyway.

--js


Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations
Posted by John Snow 4 years, 9 months ago
On 2/8/21 4:39 PM, John Snow wrote:
>>
>> I'm once again terminally confused about when to use _lower_case and
>> when to use CamelCase for such variables.
>>
> 
> That's my fault for not using them consistently.
> 
> Generally:
> 
> TitleCase: Classes, Real Type Names :tm:
> lowercase: instance names (and certain built-in types like str/bool/int)
> UPPERCASE: "Constants". This is an extremely loose idea in Python.
> 
> I use the "_" prefix for any of the above categories to indicate 
> something not intended to be used outside of the current scope. These 
> types won't be accessible outside the module by default.
> 
> TypeVars I use "T", "U", "V", etc unless I bind them to another type; 
> then I use e.g. NodeT instead.
> 
> When it comes to things like type aliases, I believe I instinctively 
> used lowercase because I am not creating a new Real Type and wanted some 
> visual distinction from a real class name. (aliases created in this way 
> cannot be used with isinstance and hold no significance to mypy.)
> 
> That's why I used _stub, _scalar, _nonscalar, and _value for those types 
> there. Then I disregarded my own convention and used TreeValue; perhaps 
> that ought to be tree_value for consistency as it's not a Real Type :tm:
> 
> ...but then we have the SchemaInfo type aliases, which I named using the 
> same type name as they use in QAPI to help paint the association (and 
> pick up 'git grep' searchers.)
> 
> Not fantastically consistent, sorry. Feel free to express a preference, 
> I clearly don't have a universally applied one.
> 
> (Current leaning: rename TreeValue to tree_value, but leave everything 
> else as it is.)

Addendum: pylint wants any non-underscored type alias to be treated like 
a class name, as CamelCase.

I guess it just exempts underscore prefixed things. So, it does have to 
stay "TreeValue".

--js