[Patchew-devel] [PATCH v3] add more indices to Message

Paolo Bonzini posted 1 patch 6 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/patchew-ci tags/patchew/20181031183212.11345-1-pbonzini@redhat.com
api/migrations/0037_auto_20181031_1439.py | 19 +++++++++++++++++++
api/models.py                             |  4 ++++
tests/test_import.py                      |  6 ++++--
3 files changed, 27 insertions(+), 2 deletions(-)
create mode 100644 api/migrations/0037_auto_20181031_1439.py

[Patchew-devel] [PATCH v3] add more indices to Message

Posted by Paolo Bonzini 6 weeks ago
Add indices to help the series list page.  The project is almost always
used as a search key, either directly or through a parent project, so
include both a variant with the project and one without.  The sorting
keys are left last, while filter keys should come first.

This reduces the SELECT COUNT(*) query in the series list from 180 ms
to 5 ms on a full dump from a few weeks ago.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 api/migrations/0037_auto_20181031_1439.py | 19 +++++++++++++++++++
 api/models.py                             |  4 ++++
 tests/test_import.py                      |  6 ++++--
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 api/migrations/0037_auto_20181031_1439.py

diff --git a/api/migrations/0037_auto_20181031_1439.py b/api/migrations/0037_auto_20181031_1439.py
new file mode 100644
index 0000000..ca75e37
--- /dev/null
+++ b/api/migrations/0037_auto_20181031_1439.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.15 on 2018-10-31 14:39
+from __future__ import unicode_literals
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0036_populate_message_tags'),
+    ]
+
+    operations = [
+        migrations.AlterIndexTogether(
+            name='message',
+            index_together=set([('is_series_head', 'project', 'date'), ('is_series_head', 'date'), ('is_series_head', 'last_reply_date'), ('is_series_head', 'project', 'last_reply_date')]),
+        ),
+    ]
diff --git a/api/models.py b/api/models.py
index ab0bd06..09758f6 100644
--- a/api/models.py
+++ b/api/models.py
@@ -714,6 +714,10 @@ class Message(models.Model):
 
     class Meta:
         unique_together = ('project', 'message_id',)
+        index_together = [('is_series_head', 'project', 'last_reply_date'),
+                          ('is_series_head', 'project', 'date'),
+                          ('is_series_head', 'last_reply_date'),
+                          ('is_series_head', 'date')]
 
 class MessageResult(Result):
     message = models.ForeignKey(Message, related_name='results')
diff --git a/tests/test_import.py b/tests/test_import.py
index 5693d7e..64d8c66 100755
--- a/tests/test_import.py
+++ b/tests/test_import.py
@@ -91,8 +91,10 @@ class ImportTest(PatchewTestCase):
         self.assertTrue(s.project.name, sp.name)
 
         self.cli_import("0020-libvirt.mbox.gz")
-        subj2 = subj + '\n[libvirt]  [PATCH v2] vcpupin: add clear feature'
-        self.check_cli(["search", "project:Libvirt"], stdout=subj2)
+        # the order of the search results may change, so we cannot use stdout=...
+        a, b = self.check_cli(["search", "project:Libvirt"])
+        self.assertEqual(sorted(a.split('\n')),
+                         ['[libvirt]  [PATCH v2] vcpupin: add clear feature', subj])
         self.check_cli(["search", "project:Libvirt-python"], stdout=subj)
 
 class UnprivilegedImportTest(ImportTest):
-- 
2.17.1

_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://www.redhat.com/mailman/listinfo/patchew-devel

Re: [Patchew-devel] [PATCH v3] add more indices to Message

Posted by Fam Zheng 6 weeks ago
On Wed, 10/31 19:32, Paolo Bonzini wrote:
> Add indices to help the series list page.  The project is almost always
> used as a search key, either directly or through a parent project, so
> include both a variant with the project and one without.  The sorting
> keys are left last, while filter keys should come first.

I don't understand when an index without project is helpful, can you elaborate?

> 
> This reduces the SELECT COUNT(*) query in the series list from 180 ms
> to 5 ms on a full dump from a few weeks ago.

Excellent! Thanks!

Fam

> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  api/migrations/0037_auto_20181031_1439.py | 19 +++++++++++++++++++
>  api/models.py                             |  4 ++++
>  tests/test_import.py                      |  6 ++++--
>  3 files changed, 27 insertions(+), 2 deletions(-)
>  create mode 100644 api/migrations/0037_auto_20181031_1439.py
> 
> diff --git a/api/migrations/0037_auto_20181031_1439.py b/api/migrations/0037_auto_20181031_1439.py
> new file mode 100644
> index 0000000..ca75e37
> --- /dev/null
> +++ b/api/migrations/0037_auto_20181031_1439.py
> @@ -0,0 +1,19 @@
> +# -*- coding: utf-8 -*-
> +# Generated by Django 1.11.15 on 2018-10-31 14:39
> +from __future__ import unicode_literals
> +
> +from django.db import migrations
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('api', '0036_populate_message_tags'),
> +    ]
> +
> +    operations = [
> +        migrations.AlterIndexTogether(
> +            name='message',
> +            index_together=set([('is_series_head', 'project', 'date'), ('is_series_head', 'date'), ('is_series_head', 'last_reply_date'), ('is_series_head', 'project', 'last_reply_date')]),
> +        ),
> +    ]
> diff --git a/api/models.py b/api/models.py
> index ab0bd06..09758f6 100644
> --- a/api/models.py
> +++ b/api/models.py
> @@ -714,6 +714,10 @@ class Message(models.Model):
>  
>      class Meta:
>          unique_together = ('project', 'message_id',)
> +        index_together = [('is_series_head', 'project', 'last_reply_date'),
> +                          ('is_series_head', 'project', 'date'),
> +                          ('is_series_head', 'last_reply_date'),
> +                          ('is_series_head', 'date')]
>  
>  class MessageResult(Result):
>      message = models.ForeignKey(Message, related_name='results')
> diff --git a/tests/test_import.py b/tests/test_import.py
> index 5693d7e..64d8c66 100755
> --- a/tests/test_import.py
> +++ b/tests/test_import.py
> @@ -91,8 +91,10 @@ class ImportTest(PatchewTestCase):
>          self.assertTrue(s.project.name, sp.name)
>  
>          self.cli_import("0020-libvirt.mbox.gz")
> -        subj2 = subj + '\n[libvirt]  [PATCH v2] vcpupin: add clear feature'
> -        self.check_cli(["search", "project:Libvirt"], stdout=subj2)
> +        # the order of the search results may change, so we cannot use stdout=...
> +        a, b = self.check_cli(["search", "project:Libvirt"])
> +        self.assertEqual(sorted(a.split('\n')),
> +                         ['[libvirt]  [PATCH v2] vcpupin: add clear feature', subj])
>          self.check_cli(["search", "project:Libvirt-python"], stdout=subj)
>  
>  class UnprivilegedImportTest(ImportTest):
> -- 
> 2.17.1
> 
> _______________________________________________
> Patchew-devel mailing list
> Patchew-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/patchew-devel

_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://www.redhat.com/mailman/listinfo/patchew-devel

Re: [Patchew-devel] [PATCH v3] add more indices to Message

Posted by Paolo Bonzini 6 weeks ago
On 01/11/2018 07:41, Fam Zheng wrote:
> On Wed, 10/31 19:32, Paolo Bonzini wrote:
>> Add indices to help the series list page.  The project is almost always
>> used as a search key, either directly or through a parent project, so
>> include both a variant with the project and one without.  The sorting
>> keys are left last, while filter keys should come first.
> 
> I don't understand when an index without project is helpful, can you elaborate?

It helps when searching, for example, by "from:pbonzini".  Having
is_series_head as part of the key helps cutting the initial SELECT
COUNT(*) query cost, and the new index can be used to quickly do both
WHERE and ORDER BY when patchew actually fetches the contents of the
list page.

The date and last_reply_date indices probably can be removed, in fact,
since they are always used together with is_series_head.

Paolo

>>
>> This reduces the SELECT COUNT(*) query in the series list from 180 ms
>> to 5 ms on a full dump from a few weeks ago.
> 
> Excellent! Thanks!
> 
> Fam
> 
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  api/migrations/0037_auto_20181031_1439.py | 19 +++++++++++++++++++
>>  api/models.py                             |  4 ++++
>>  tests/test_import.py                      |  6 ++++--
>>  3 files changed, 27 insertions(+), 2 deletions(-)
>>  create mode 100644 api/migrations/0037_auto_20181031_1439.py
>>
>> diff --git a/api/migrations/0037_auto_20181031_1439.py b/api/migrations/0037_auto_20181031_1439.py
>> new file mode 100644
>> index 0000000..ca75e37
>> --- /dev/null
>> +++ b/api/migrations/0037_auto_20181031_1439.py
>> @@ -0,0 +1,19 @@
>> +# -*- coding: utf-8 -*-
>> +# Generated by Django 1.11.15 on 2018-10-31 14:39
>> +from __future__ import unicode_literals
>> +
>> +from django.db import migrations
>> +
>> +
>> +class Migration(migrations.Migration):
>> +
>> +    dependencies = [
>> +        ('api', '0036_populate_message_tags'),
>> +    ]
>> +
>> +    operations = [
>> +        migrations.AlterIndexTogether(
>> +            name='message',
>> +            index_together=set([('is_series_head', 'project', 'date'), ('is_series_head', 'date'), ('is_series_head', 'last_reply_date'), ('is_series_head', 'project', 'last_reply_date')]),
>> +        ),
>> +    ]
>> diff --git a/api/models.py b/api/models.py
>> index ab0bd06..09758f6 100644
>> --- a/api/models.py
>> +++ b/api/models.py
>> @@ -714,6 +714,10 @@ class Message(models.Model):
>>  
>>      class Meta:
>>          unique_together = ('project', 'message_id',)
>> +        index_together = [('is_series_head', 'project', 'last_reply_date'),
>> +                          ('is_series_head', 'project', 'date'),
>> +                          ('is_series_head', 'last_reply_date'),
>> +                          ('is_series_head', 'date')]
>>  
>>  class MessageResult(Result):
>>      message = models.ForeignKey(Message, related_name='results')
>> diff --git a/tests/test_import.py b/tests/test_import.py
>> index 5693d7e..64d8c66 100755
>> --- a/tests/test_import.py
>> +++ b/tests/test_import.py
>> @@ -91,8 +91,10 @@ class ImportTest(PatchewTestCase):
>>          self.assertTrue(s.project.name, sp.name)
>>  
>>          self.cli_import("0020-libvirt.mbox.gz")
>> -        subj2 = subj + '\n[libvirt]  [PATCH v2] vcpupin: add clear feature'
>> -        self.check_cli(["search", "project:Libvirt"], stdout=subj2)
>> +        # the order of the search results may change, so we cannot use stdout=...
>> +        a, b = self.check_cli(["search", "project:Libvirt"])
>> +        self.assertEqual(sorted(a.split('\n')),
>> +                         ['[libvirt]  [PATCH v2] vcpupin: add clear feature', subj])
>>          self.check_cli(["search", "project:Libvirt-python"], stdout=subj)
>>  
>>  class UnprivilegedImportTest(ImportTest):
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> Patchew-devel mailing list
>> Patchew-devel@redhat.com
>> https://www.redhat.com/mailman/listinfo/patchew-devel

_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://www.redhat.com/mailman/listinfo/patchew-devel

Re: [Patchew-devel] [PATCH v3] add more indices to Message

Posted by Fam Zheng 6 weeks ago
On Thu, Nov 1, 2018 at 4:18 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 01/11/2018 07:41, Fam Zheng wrote:
> > On Wed, 10/31 19:32, Paolo Bonzini wrote:
> >> Add indices to help the series list page.  The project is almost always
> >> used as a search key, either directly or through a parent project, so
> >> include both a variant with the project and one without.  The sorting
> >> keys are left last, while filter keys should come first.
> >
> > I don't understand when an index without project is helpful, can you elaborate?
>
> It helps when searching, for example, by "from:pbonzini".  Having
> is_series_head as part of the key helps cutting the initial SELECT
> COUNT(*) query cost, and the new index can be used to quickly do both
> WHERE and ORDER BY when patchew actually fetches the contents of the
> list page.
>
> The date and last_reply_date indices probably can be removed, in fact,
> since they are always used together with is_series_head.

OK! I'll queue this in next branch, thanks!

Fam

>
> Paolo
>
> >>
> >> This reduces the SELECT COUNT(*) query in the series list from 180 ms
> >> to 5 ms on a full dump from a few weeks ago.
> >
> > Excellent! Thanks!
> >
> > Fam
> >
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >>  api/migrations/0037_auto_20181031_1439.py | 19 +++++++++++++++++++
> >>  api/models.py                             |  4 ++++
> >>  tests/test_import.py                      |  6 ++++--
> >>  3 files changed, 27 insertions(+), 2 deletions(-)
> >>  create mode 100644 api/migrations/0037_auto_20181031_1439.py
> >>
> >> diff --git a/api/migrations/0037_auto_20181031_1439.py b/api/migrations/0037_auto_20181031_1439.py
> >> new file mode 100644
> >> index 0000000..ca75e37
> >> --- /dev/null
> >> +++ b/api/migrations/0037_auto_20181031_1439.py
> >> @@ -0,0 +1,19 @@
> >> +# -*- coding: utf-8 -*-
> >> +# Generated by Django 1.11.15 on 2018-10-31 14:39
> >> +from __future__ import unicode_literals
> >> +
> >> +from django.db import migrations
> >> +
> >> +
> >> +class Migration(migrations.Migration):
> >> +
> >> +    dependencies = [
> >> +        ('api', '0036_populate_message_tags'),
> >> +    ]
> >> +
> >> +    operations = [
> >> +        migrations.AlterIndexTogether(
> >> +            name='message',
> >> +            index_together=set([('is_series_head', 'project', 'date'), ('is_series_head', 'date'), ('is_series_head', 'last_reply_date'), ('is_series_head', 'project', 'last_reply_date')]),
> >> +        ),
> >> +    ]
> >> diff --git a/api/models.py b/api/models.py
> >> index ab0bd06..09758f6 100644
> >> --- a/api/models.py
> >> +++ b/api/models.py
> >> @@ -714,6 +714,10 @@ class Message(models.Model):
> >>
> >>      class Meta:
> >>          unique_together = ('project', 'message_id',)
> >> +        index_together = [('is_series_head', 'project', 'last_reply_date'),
> >> +                          ('is_series_head', 'project', 'date'),
> >> +                          ('is_series_head', 'last_reply_date'),
> >> +                          ('is_series_head', 'date')]
> >>
> >>  class MessageResult(Result):
> >>      message = models.ForeignKey(Message, related_name='results')
> >> diff --git a/tests/test_import.py b/tests/test_import.py
> >> index 5693d7e..64d8c66 100755
> >> --- a/tests/test_import.py
> >> +++ b/tests/test_import.py
> >> @@ -91,8 +91,10 @@ class ImportTest(PatchewTestCase):
> >>          self.assertTrue(s.project.name, sp.name)
> >>
> >>          self.cli_import("0020-libvirt.mbox.gz")
> >> -        subj2 = subj + '\n[libvirt]  [PATCH v2] vcpupin: add clear feature'
> >> -        self.check_cli(["search", "project:Libvirt"], stdout=subj2)
> >> +        # the order of the search results may change, so we cannot use stdout=...
> >> +        a, b = self.check_cli(["search", "project:Libvirt"])
> >> +        self.assertEqual(sorted(a.split('\n')),
> >> +                         ['[libvirt]  [PATCH v2] vcpupin: add clear feature', subj])
> >>          self.check_cli(["search", "project:Libvirt-python"], stdout=subj)
> >>
> >>  class UnprivilegedImportTest(ImportTest):
> >> --
> >> 2.17.1
> >>
> >> _______________________________________________
> >> Patchew-devel mailing list
> >> Patchew-devel@redhat.com
> >> https://www.redhat.com/mailman/listinfo/patchew-devel
>

_______________________________________________
Patchew-devel mailing list
Patchew-devel@redhat.com
https://www.redhat.com/mailman/listinfo/patchew-devel