From 7384fda61afc20154e8c03a2e45c3ae3ebd01b17 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Wed, 10 May 2023 11:47:29 +0200 Subject: [PATCH 1/4] Allow to create search index without changing unaccent mutability cf #519 --- docs/install.md | 13 +++---------- umap/views.py | 4 ++-- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/docs/install.md b/docs/install.md index 45c62c2a..02f13236 100644 --- a/docs/install.md +++ b/docs/install.md @@ -84,13 +84,6 @@ may want to add an index. For that, you should do so: CREATE EXTENSION unaccent; CREATE EXTENSION btree_gin; - ALTER FUNCTION unaccent(text) IMMUTABLE; - ALTER FUNCTION to_tsvector(text) IMMUTABLE; - CREATE INDEX search_idx ON umap_map USING gin(to_tsvector(unaccent(name)), share_status); - - -## Optimisations - -To speed up uMap homepage rendering on a large instance, the following index can be added as well (make sure you set the center to your default instance map center): - - CREATE INDEX umap_map_optim ON umap_map (modified_at) WHERE ("umap_map"."share_status" = 1 AND ST_Distance("umap_map"."center", ST_GeomFromEWKT('SRID=4326;POINT(2 51)')) > 1000.0); + CREATE TEXT SEARCH CONFIGURATION umapdict (COPY=simple); + ALTER TEXT SEARCH CONFIGURATION umapdict ALTER MAPPING FOR hword, hword_part, word WITH unaccent, simple; + CREATE INDEX IF NOT EXISTS search_idx ON umap_map USING GIN(to_tsvector('umapdict', name), share_status); diff --git a/umap/views.py b/umap/views.py index 55c0c74a..2c193fa9 100644 --- a/umap/views.py +++ b/umap/views.py @@ -190,9 +190,9 @@ class Search(TemplateView, PaginatorMixin): q = self.request.GET.get("q") results = [] if q: - where = "to_tsvector(name) @@ plainto_tsquery(%s)" + where = "to_tsvector(name) @@ websearch_to_tsquery(%s)" if getattr(settings, "UMAP_USE_UNACCENT", False): - where = "to_tsvector(unaccent(name)) @@ plainto_tsquery(unaccent(%s))" # noqa + where = "to_tsvector(unaccent(name)) @@ websearch_to_tsquery(unaccent(%s))" # noqa results = Map.objects.filter(share_status=Map.PUBLIC) results = results.extra(where=[where], params=[q]) results = results.order_by("-modified_at") From 005a759b816b0b28d1a2920f6b84ab239b98e091 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Wed, 10 May 2023 19:24:33 +0200 Subject: [PATCH 2/4] Update umap/views.py Co-authored-by: Adrien nayrat --- umap/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umap/views.py b/umap/views.py index 2c193fa9..df17c18b 100644 --- a/umap/views.py +++ b/umap/views.py @@ -192,7 +192,7 @@ class Search(TemplateView, PaginatorMixin): if q: where = "to_tsvector(name) @@ websearch_to_tsquery(%s)" if getattr(settings, "UMAP_USE_UNACCENT", False): - where = "to_tsvector(unaccent(name)) @@ websearch_to_tsquery(unaccent(%s))" # noqa + where = "to_tsvector('umapdict',name) @@ websearch_to_tsquery('umapdict',%s)" # noqa results = Map.objects.filter(share_status=Map.PUBLIC) results = results.extra(where=[where], params=[q]) results = results.order_by("-modified_at") From 1038836a725ec19026cb49a15a22a7e6c107dc24 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Thu, 11 May 2023 11:33:08 +0200 Subject: [PATCH 3/4] Use Django full text instead of custom SQL --- docs/install.md | 9 ++++++++- umap/settings/base.py | 2 +- umap/settings/local.py.sample | 5 ----- umap/tests/test_map_views.py | 10 ++++++++++ umap/views.py | 15 ++++++++------- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/docs/install.md b/docs/install.md index 02f13236..1aa9c8e3 100644 --- a/docs/install.md +++ b/docs/install.md @@ -82,8 +82,15 @@ Start the server UMap uses PostgreSQL tsvector for searching. In case your database is big, you may want to add an index. For that, you should do so: + # Create a basic search configuration + CREATE TEXT SEARCH CONFIGURATION umapdict (COPY=simple); + + # If you also want to deal with accents and case, add this before creating the index CREATE EXTENSION unaccent; CREATE EXTENSION btree_gin; - CREATE TEXT SEARCH CONFIGURATION umapdict (COPY=simple); ALTER TEXT SEARCH CONFIGURATION umapdict ALTER MAPPING FOR hword, hword_part, word WITH unaccent, simple; + + # Now create the index CREATE INDEX IF NOT EXISTS search_idx ON umap_map USING GIN(to_tsvector('umapdict', name), share_status); + +And change `UMAP_SEARCH_CONFIGURATION = "umapdict"` in your settings. diff --git a/umap/settings/base.py b/umap/settings/base.py index 07bb2270..23881b68 100644 --- a/umap/settings/base.py +++ b/umap/settings/base.py @@ -215,7 +215,7 @@ UMAP_DEMO_SITE = False UMAP_EXCLUDE_DEFAULT_MAPS = False UMAP_MAPS_PER_PAGE = 5 UMAP_MAPS_PER_PAGE_OWNER = 10 -UMAP_USE_UNACCENT = False +UMAP_SEARCH_CONFIGURATION = "simple" UMAP_FEEDBACK_LINK = "https://wiki.openstreetmap.org/wiki/UMap#Feedback_and_help" # noqa USER_MAPS_URL = 'user_maps' DATABASES = { diff --git a/umap/settings/local.py.sample b/umap/settings/local.py.sample index f50c3d1a..a22a7dff 100644 --- a/umap/settings/local.py.sample +++ b/umap/settings/local.py.sample @@ -94,11 +94,6 @@ SHORT_SITE_URL = "http://s.hort" # POSTGIS_VERSION = (2, 1, 0) EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' -# You need to unable accent extension before using UMAP_USE_UNACCENT -# python manage.py dbshell -# CREATE EXTENSION unaccent; -UMAP_USE_UNACCENT = False - # Put the site in readonly mode (useful for migration or any maintenance) UMAP_READONLY = False diff --git a/umap/tests/test_map_views.py b/umap/tests/test_map_views.py index 8c2b2279..aa237517 100644 --- a/umap/tests/test_map_views.py +++ b/umap/tests/test_map_views.py @@ -529,3 +529,13 @@ def test_create_readonly(client, user, post_data, settings): response = client.post(url, post_data) assert response.status_code == 403 assert response.content == b'Site is readonly for maintenance' + + +def test_search(client, map): + # Very basic search, that do not deal with accent nor case. + # See install.md for how to have a smarter dict + index. + map.name = "Blé dur" + map.save() + url = reverse("search") + response = client.get(url + "?q=Blé") + assert "Blé dur" in response.content.decode() diff --git a/umap/views.py b/umap/views.py index df17c18b..1228853e 100644 --- a/umap/views.py +++ b/umap/views.py @@ -10,6 +10,7 @@ from django.contrib import messages from django.contrib.auth import logout as do_logout from django.contrib.auth import get_user_model from django.contrib.gis.measure import D +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator from django.core.signing import BadSignature, Signer from django.core.validators import URLValidator, ValidationError @@ -190,13 +191,13 @@ class Search(TemplateView, PaginatorMixin): q = self.request.GET.get("q") results = [] if q: - where = "to_tsvector(name) @@ websearch_to_tsquery(%s)" - if getattr(settings, "UMAP_USE_UNACCENT", False): - where = "to_tsvector('umapdict',name) @@ websearch_to_tsquery('umapdict',%s)" # noqa - results = Map.objects.filter(share_status=Map.PUBLIC) - results = results.extra(where=[where], params=[q]) - results = results.order_by("-modified_at") - results = self.paginate(results) + vector = SearchVector("name", config=settings.UMAP_SEARCH_CONFIGURATION) + query = SearchQuery( + q, config=settings.UMAP_SEARCH_CONFIGURATION, search_type="websearch" + ) + qs = Map.objects.annotate(search=vector).filter(search=query) + qs = qs.filter(share_status=Map.PUBLIC).order_by('-modified_at') + results = self.paginate(qs) kwargs.update({"maps": results, "q": q}) return kwargs From e3d5bd794f3bbad8b7d78c3a610456c077183767 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Thu, 11 May 2023 11:33:30 +0200 Subject: [PATCH 4/4] black is a colour --- umap/views.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/umap/views.py b/umap/views.py index 1228853e..c2e7f269 100644 --- a/umap/views.py +++ b/umap/views.py @@ -141,7 +141,6 @@ home = Home.as_view() class About(Home): - template_name = "umap/about.html" @@ -353,7 +352,6 @@ class FormLessEditMixin: class MapDetailMixin: - model = Map def get_context_data(self, **kwargs): @@ -650,7 +648,6 @@ class MapShortUrl(RedirectView): class MapAnonymousEditUrl(RedirectView): - permanent = False def get(self, request, *args, **kwargs): @@ -658,7 +655,7 @@ class MapAnonymousEditUrl(RedirectView): try: pk = signer.unsign(self.kwargs["signature"]) except BadSignature: - signer = Signer(algorithm='sha1') + signer = Signer(algorithm="sha1") try: pk = signer.unsign(self.kwargs["signature"]) except BadSignature: @@ -681,7 +678,6 @@ class MapAnonymousEditUrl(RedirectView): class GZipMixin(object): - EXT = ".gz" @property