[Patchew-devel] [PATCH] models: use subqueries and IN to find message for a project and its parent

Paolo Bonzini posted 1 patch 2 years ago
Failed in applying to current master (apply log)
api/models.py | 12 +++++++++++-
api/search.py |  6 +++---
2 files changed, 14 insertions(+), 4 deletions(-)
[Patchew-devel] [PATCH] models: use subqueries and IN to find message for a project and its parent
Posted by Paolo Bonzini 2 years ago
The database likes this a bit more.  Before:

  SELECT COUNT(*) AS "__count"
    FROM "api_message"
    INNER JOIN "api_project" ON ("api_message"."project_id" = "api_project"."id")
    WHERE (("api_message"."project_id" = 1 OR "api_project"."parent_project_id" = 1)
      AND "api_message"."topic_id" IS NOT NULL);

with a cache-hot runtime of ~1 second and the following plan:

 Aggregate  (cost=64143.99..64144.00 rows=1 width=8)
   ->  Hash Join  (cost=6.65..64030.37 rows=45449 width=0)
         Hash Cond: (api_message.project_id = api_project.id)
         Join Filter: ((api_message.project_id = 1) OR (api_project.parent_project_id = 1))
         ->  Index Only Scan using api_message_topic_id_project_id_last_reply_date_871c0af8_idx on api_message  (cost=0.43..63676.05 rows=104690 width=4)
               Index Cond: (topic_id IS NOT NULL)
         ->  Hash  (cost=6.10..6.10 rows=10 width=8)
               ->  Seq Scan on api_project  (cost=0.00..6.10 rows=10 width=8)

After, the subquery is indeed executed first, giving a cache-hot time of
~250 ms:

  SELECT COUNT(*) AS "__count" FROM "api_message"
    WHERE ("api_message"."project_id" IN (SELECT U0."id" FROM "api_project" U0 LEFT OUTER JOIN "api_project" U1 ON (U0."parent_project_id" = U1."id") WHERE (U0."name" = 'QEMU' OR U1."name" = 'QEMU')))
    AND "api_message"."topic_id" IS NOT NULL;

 Aggregate  (cost=19272.83..19272.84 rows=1 width=8)
   ->  Nested Loop  (cost=12.79..19220.47 rows=20945 width=0)
         ->  Unique  (cost=12.36..12.37 rows=2 width=4)
               ->  Sort  (cost=12.36..12.37 rows=2 width=4)
                     Sort Key: u0.id
                     ->  Hash Left Join  (cost=6.22..12.35 rows=2 width=4)
                           Hash Cond: (u0.parent_project_id = u1.id)
                           Filter: (((u0.name)::text = 'QEMU'::text) OR ((u1.name)::text = 'QEMU'::text))
                           ->  Seq Scan on api_project u0  (cost=0.00..6.10 rows=10 width=14)
                           ->  Hash  (cost=6.10..6.10 rows=10 width=10)
                                 ->  Seq Scan on api_project u1  (cost=0.00..6.10 rows=10 width=10)
         ->  Index Only Scan using api_message_topic_id_project_id_last_reply_date_871c0af8_idx on api_message  (cost=0.43..9499.33 rows=10472 width=4)
               Index Cond: ((topic_id IS NOT NULL) AND (project_id = u0.id))

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 api/models.py | 12 +++++++++++-
 api/search.py |  6 +++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/api/models.py b/api/models.py
index b1b42c1..6bb9867 100644
--- a/api/models.py
+++ b/api/models.py
@@ -14,6 +14,7 @@ import re
 
 from django.core import validators
 from django.db import models
+from django.db.models import Q
 from django.contrib.auth.models import User
 from django.urls import reverse
 import jsonfield
@@ -300,6 +301,14 @@ class Project(models.Model):
     def get_subprojects(self):
         return Project.objects.filter(parent_project=self)
 
+    @classmethod
+    def get_project_ids_by_id(cls, id):
+        return cls.objects.filter(Q(id=id) | Q(parent_project__id=id)).values_list('id')
+
+    @classmethod
+    def get_project_ids_by_name(cls, name):
+        return cls.objects.filter(Q(name=name) | Q(parent_project__name=name)).values_list('id')
+
     def get_project_head(self):
         return self.get_property("git.head")
 
@@ -411,8 +420,9 @@ class MessageManager(models.Manager):
         if po is None:
             return None
 
+        ids = po.get_project_ids_by_id(po.id)
         q = self.get_queryset()
-        q = q.filter(project=po) | q.filter(project__parent_project=po)
+        q = q.filter(project__pk__in=ids)
         return q
 
     def series_heads(self, project=None):
diff --git a/api/search.py b/api/search.py
index 573b1a2..cfd963b 100644
--- a/api/search.py
+++ b/api/search.py
@@ -8,7 +8,7 @@
 # This work is licensed under the MIT License.  Please see the LICENSE file or
 # http://opensource.org/licenses/MIT.
 
-from .models import Message, MessageResult, Result, QueuedSeries
+from .models import Message, MessageResult, Project, Result, QueuedSeries
 from collections import namedtuple
 from functools import reduce
 import operator
@@ -269,10 +269,10 @@ def __parser(_Q):
         return q
 
     def _make_filter_project(cond):
+        ids = Project.get_project_ids_by_name(cond)
         return SearchTerm(
             project=cond,
-            query=_Q(project__name=cond) | _Q(project__parent_project__name=cond),
-        )
+            query=_Q(project__pk__in=ids))
 
     def _make_filter_is(cond):
         if cond == "complete":
-- 
2.37.2

_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/patchew-devel