From nobody Thu Apr 25 10:20:46 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) client-ip=209.132.183.28; envelope-from=patchew-devel-bounces@redhat.com; helo=mx1.redhat.com; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) smtp.mailfrom=patchew-devel-bounces@redhat.com; dmarc=pass(p=none dis=none) header.from=redhat.com Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by mx.zohomail.com with SMTPS id 1542636611246738.213570413779; Mon, 19 Nov 2018 06:10:11 -0800 (PST) Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id B8FB688306; Mon, 19 Nov 2018 14:10:07 +0000 (UTC) Received: from colo-mx.corp.redhat.com (colo-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.21]) by smtp.corp.redhat.com (Postfix) with ESMTPS id A997D4C48E; Mon, 19 Nov 2018 14:10:07 +0000 (UTC) Received: from lists01.pubmisc.prod.ext.phx2.redhat.com (lists01.pubmisc.prod.ext.phx2.redhat.com [10.5.19.33]) by colo-mx.corp.redhat.com (Postfix) with ESMTP id 960B2246E3; Mon, 19 Nov 2018 14:10:07 +0000 (UTC) Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) by lists01.pubmisc.prod.ext.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id wAJDghGC026466 for ; Mon, 19 Nov 2018 08:42:43 -0500 Received: by smtp.corp.redhat.com (Postfix) id 96EFC5C223; Mon, 19 Nov 2018 13:42:43 +0000 (UTC) Received: from donizetti.redhat.com (ovpn-112-56.ams2.redhat.com [10.36.112.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id A48D05C57B for ; Mon, 19 Nov 2018 13:42:42 +0000 (UTC) From: Paolo Bonzini To: patchew-devel@redhat.com Date: Mon, 19 Nov 2018 14:42:35 +0100 Message-Id: <20181119134236.11260-2-pbonzini@redhat.com> In-Reply-To: <20181119134236.11260-1-pbonzini@redhat.com> References: <20181119134236.11260-1-pbonzini@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-loop: patchew-devel@redhat.com Subject: [Patchew-devel] [PATCH 1/2] use postgresql full text search X-BeenThere: patchew-devel@redhat.com X-Mailman-Version: 2.1.12 Precedence: junk List-Id: Patchew development and discussion list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Transfer-Encoding: quoted-printable Sender: patchew-devel-bounces@redhat.com Errors-To: patchew-devel-bounces@redhat.com X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Mon, 19 Nov 2018 14:10:07 +0000 (UTC) Content-Type: text/plain; charset="utf-8" Signed-off-by: Paolo Bonzini --- api/migrations/0040_postgres_fts.py | 26 +++++++++++++++++++++++++ api/migrations/__init__.py | 15 +++++++++++++++ api/search.py | 30 +++++++++++++++++++++-------- patchew/settings.py | 3 +++ 4 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 api/migrations/0040_postgres_fts.py diff --git a/api/migrations/0040_postgres_fts.py b/api/migrations/0040_post= gres_fts.py new file mode 100644 index 0000000..8d8ab95 --- /dev/null +++ b/api/migrations/0040_postgres_fts.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +from django.contrib.postgres.operations import TrigramExtension + +from api.migrations import PostgresOnlyMigration + + +class Migration(PostgresOnlyMigration): + + dependencies =3D [ + ('api', '0039_fix_project_attr_help_texts'), + ] + + operations =3D [ + TrigramExtension(), + migrations.RunSQL("create index api_message_subject_gin on api_mes= sage using gin(to_tsvector('english', subject::text));", + "drop index api_message_subject_gin"), + migrations.RunSQL("create index api_message_sender_gin on api_mess= age using gin(upper(sender) gin_trgm_ops);", + "drop index api_message_sender_gin"), + migrations.RunSQL("create index api_message_recipients_gin on api_= message using gin(upper(recipients) gin_trgm_ops);", + "drop index api_message_recipients_gin"), + ] diff --git a/api/migrations/__init__.py b/api/migrations/__init__.py index 510b26a..a273050 100644 --- a/api/migrations/__init__.py +++ b/api/migrations/__init__.py @@ -8,6 +8,7 @@ # This work is licensed under the MIT License. Please see the LICENSE fil= e or # http://opensource.org/licenses/MIT. =20 +from django.db import migrations =20 import json from api import blobs @@ -48,3 +49,17 @@ def set_property(model, name, value, **kwargs): if hasattr(mp, 'blob'): mp.blob =3D False mp.save() + + +class PostgresOnlyMigration(migrations.Migration): + def apply(self, project_state, schema_editor, collect_sql=3DFalse): + if schema_editor.connection.vendor =3D=3D "postgresql": + return super().apply(project_state, schema_editor, collect_sql= =3Dcollect_sql) + else: + return project_state + + def unapply(self, project_state, schema_editor, collect_sql=3DFalse): + if schema_editor.connection.vendor =3D=3D "postgresql": + return super().unapply(project_state, schema_editor, collect_s= ql=3Dcollect_sql) + else: + return project_state diff --git a/api/search.py b/api/search.py index 9b41641..488afa1 100644 --- a/api/search.py +++ b/api/search.py @@ -10,9 +10,12 @@ =20 from .models import Message, MessageProperty, MessageResult, Result, Review from functools import reduce + +from django.db import connection from django.db.models import Q =20 -from django.db.models import Lookup +from django.contrib.postgres.search import SearchQuery, SearchVector +from django.db.models import Lookup, lookups from django.db.models.fields import Field =20 =20 @@ -215,14 +218,15 @@ Search text keyword in the email message. Example: q =3D Q(date__lte=3Dp) return q =20 - def _make_filter_keywords(self, t): + def _add_to_keywords(self, t): self._last_keywords.append(t) - return Q(subject__icontains=3Dt) + return Q() =20 def _make_filter_is(self, cond): if cond =3D=3D "complete": return Q(is_complete=3DTrue) elif cond =3D=3D "pull": + self._add_to_keywords('PULL') return Q(subject__contains=3D'[PULL') | Q(subject__contains=3D= '[GIT PULL') elif cond =3D=3D "reviewed": return self._make_filter_subquery(MessageProperty, Q(name=3D"r= eviewed", value=3DTrue)) @@ -263,16 +267,16 @@ Search text keyword in the email message. Example: return Q(recipients__icontains=3Dcond) elif term.startswith("subject:"): cond =3D term[term.find(":") + 1:] - return Q(subject__icontains=3Dcond) + return self._add_to_keywords(cond) elif term.startswith("id:"): cond =3D term[term.find(":") + 1:] if cond[0] =3D=3D "<" and cond[-1] =3D=3D ">": cond =3D cond[1:-1] return Q(message_id=3Dcond) elif term.startswith("is:"): - return self._make_filter_is(term[3:]) or self._make_filter_key= words(term) + return self._make_filter_is(term[3:]) or self._add_to_keywords= (term) elif term.startswith("not:"): - return ~self._make_filter_is(term[4:]) or self._make_filter_ke= ywords(term) + return ~self._make_filter_is(term[4:]) or self._add_to_keyword= s(term) elif term.startswith("has:"): cond =3D term[term.find(":") + 1:] if cond =3D=3D "replies": @@ -305,7 +309,7 @@ Search text keyword in the email message. Example: return Q(project__name=3Dcond) | Q(project__parent_project__na= me=3Dcond) =20 # Keyword in subject is the default - return self._make_filter_keywords(term) + return self._add_to_keywords(term) =20 def _process_term(self, term, user): """ Return a Q object that will be applied to the query """ @@ -316,7 +320,7 @@ Search text keyword in the email message. Example: term =3D term[1:] =20 if is_plusminus and ":" not in term: - q =3D self._make_filter_is(term) or self._make_filter_keywords= (term) + q =3D self._make_filter_is(term) or self._add_to_keywords(term) else: q =3D self._make_filter(term, user) if neg: @@ -340,4 +344,14 @@ Search text keyword in the email message. Example: ) if queryset is None: queryset =3D Message.objects.series_heads() + if self._last_keywords: + if connection.vendor =3D=3D 'postgresql': + queryset =3D queryset.annotate(subjsearch=3DSearchVector('= subject', config=3D'english')) + searchq =3D reduce(lambda x, y: x & SearchQuery(y,config= =3D'english'), + self._last_keywords) + q =3D q & Q(subjsearch=3Dsearchq) + else: + q =3D reduce(lambda x, y: x & Q(subject__icontains=3Dy), + self._last_keywords, q) + return queryset.filter(q) diff --git a/patchew/settings.py b/patchew/settings.py index e433a9b..b09bba4 100644 --- a/patchew/settings.py +++ b/patchew/settings.py @@ -128,6 +128,9 @@ def env_detect(): =20 DEBUG, DATA_DIR, DATABASES =3D env_detect() =20 +if DATABASES['default']['ENGINE'] =3D=3D 'django.db.backends.postgresql': + INSTALLED_APPS +=3D ['django.contrib.postgres'] + # In production environments, we run in a container, behind nginx, which s= hould # filter the allowed host names. So be a little flexible here ALLOWED_HOSTS =3D ["*"] --=20 2.19.1 _______________________________________________ Patchew-devel mailing list Patchew-devel@redhat.com https://www.redhat.com/mailman/listinfo/patchew-devel From nobody Thu Apr 25 10:20:46 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) client-ip=209.132.183.28; envelope-from=patchew-devel-bounces@redhat.com; helo=mx1.redhat.com; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of redhat.com designates 209.132.183.28 as permitted sender) smtp.mailfrom=patchew-devel-bounces@redhat.com; dmarc=pass(p=none dis=none) header.from=redhat.com Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by mx.zohomail.com with SMTPS id 1542637536400180.28005323012246; Mon, 19 Nov 2018 06:25:36 -0800 (PST) Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 9F4643082B71; Mon, 19 Nov 2018 14:25:34 +0000 (UTC) Received: from colo-mx.corp.redhat.com (colo-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.20]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 9041F5D75D; Mon, 19 Nov 2018 14:25:34 +0000 (UTC) Received: from lists01.pubmisc.prod.ext.phx2.redhat.com (lists01.pubmisc.prod.ext.phx2.redhat.com [10.5.19.33]) by colo-mx.corp.redhat.com (Postfix) with ESMTP id 82999181B9EA; Mon, 19 Nov 2018 14:25:34 +0000 (UTC) Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) by lists01.pubmisc.prod.ext.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id wAJDgjrI026472 for ; Mon, 19 Nov 2018 08:42:45 -0500 Received: by smtp.corp.redhat.com (Postfix) id 00C9E5C223; Mon, 19 Nov 2018 13:42:45 +0000 (UTC) Received: from donizetti.redhat.com (ovpn-112-56.ams2.redhat.com [10.36.112.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id 0D5B85C8A1 for ; Mon, 19 Nov 2018 13:42:43 +0000 (UTC) From: Paolo Bonzini To: patchew-devel@redhat.com Date: Mon, 19 Nov 2018 14:42:36 +0100 Message-Id: <20181119134236.11260-3-pbonzini@redhat.com> In-Reply-To: <20181119134236.11260-1-pbonzini@redhat.com> References: <20181119134236.11260-1-pbonzini@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-loop: patchew-devel@redhat.com Subject: [Patchew-devel] [PATCH 2/2] work around bug in postgresql full text search X-BeenThere: patchew-devel@redhat.com X-Mailman-Version: 2.1.12 Precedence: junk List-Id: Patchew development and discussion list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Transfer-Encoding: quoted-printable Sender: patchew-devel-bounces@redhat.com Errors-To: patchew-devel-bounces@redhat.com X-Scanned-By: MIMEDefang 2.79 on 10.5.11.15 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.45]); Mon, 19 Nov 2018 14:25:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" The GIN index is not used when searching for keywords. Hack around this bug in Postgres. Signed-off-by: Paolo Bonzini --- api/search.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/api/search.py b/api/search.py index 488afa1..66f78b9 100644 --- a/api/search.py +++ b/api/search.py @@ -14,7 +14,7 @@ from functools import reduce from django.db import connection from django.db.models import Q =20 -from django.contrib.postgres.search import SearchQuery, SearchVector +from django.contrib.postgres.search import SearchQuery, SearchVector, Sear= chVectorField from django.db.models import Lookup, lookups from django.db.models.fields import Field =20 @@ -33,6 +33,20 @@ class NotEqual(Lookup): class InvalidSearchTerm(Exception): pass =20 +# Hack alert: Django wraps each argument to to_tsvector with a COALESCE fu= nction, +# and that causes postgres not to use the index. Monkeypatch the construc= tor +# to skip that step, which we do not need since the subject field is not n= ullable. +class NonNullSearchVector(SearchVector): + function =3D 'to_tsvector' + arg_joiner =3D " || ' ' || " + _output_field =3D SearchVectorField() + config =3D None + + def __init__(self, *expressions, **extra): + super(SearchVector, self).__init__(*expressions, **extra) + self.config =3D self.extra.get('config', self.config) + self.weight =3D None + =20 class SearchEngine(object): """ @@ -346,7 +360,7 @@ Search text keyword in the email message. Example: queryset =3D Message.objects.series_heads() if self._last_keywords: if connection.vendor =3D=3D 'postgresql': - queryset =3D queryset.annotate(subjsearch=3DSearchVector('= subject', config=3D'english')) + queryset =3D queryset.annotate(subjsearch=3DNonNullSearchV= ector('subject', config=3D'english')) searchq =3D reduce(lambda x, y: x & SearchQuery(y,config= =3D'english'), self._last_keywords) q =3D q & Q(subjsearch=3Dsearchq) --=20 2.19.1 _______________________________________________ Patchew-devel mailing list Patchew-devel@redhat.com https://www.redhat.com/mailman/listinfo/patchew-devel