Merge pull request #1082 from umap-project/mutable-search

Allow to create search index without changing unaccent mutability
This commit is contained in:
Yohan Boniface 2023-05-12 16:42:14 +02:00 committed by GitHub
commit 7dda4a6da0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 26 deletions

View file

@ -82,15 +82,15 @@ Start the server
UMap uses PostgreSQL tsvector for searching. In case your database is big, you UMap uses PostgreSQL tsvector for searching. In case your database is big, you
may want to add an index. For that, you should do so: may want to add an index. For that, you should do so:
# Create a basic search configuration
CREATE TEXT SEARCH CONFIGURATION umapdict (COPY=simple);
# If you also want to deal with accents and case, add this before creating the index
CREATE EXTENSION unaccent; CREATE EXTENSION unaccent;
CREATE EXTENSION btree_gin; CREATE EXTENSION btree_gin;
ALTER FUNCTION unaccent(text) IMMUTABLE; ALTER TEXT SEARCH CONFIGURATION umapdict ALTER MAPPING FOR hword, hword_part, word WITH unaccent, simple;
ALTER FUNCTION to_tsvector(text) IMMUTABLE;
CREATE INDEX search_idx ON umap_map USING gin(to_tsvector(unaccent(name)), share_status);
# Now create the index
CREATE INDEX IF NOT EXISTS search_idx ON umap_map USING GIN(to_tsvector('umapdict', name), share_status);
## Optimisations And change `UMAP_SEARCH_CONFIGURATION = "umapdict"` in your settings.
To speed up uMap homepage rendering on a large instance, the following index can be added as well (make sure you set the center to your default instance map center):
CREATE INDEX umap_map_optim ON umap_map (modified_at) WHERE ("umap_map"."share_status" = 1 AND ST_Distance("umap_map"."center", ST_GeomFromEWKT('SRID=4326;POINT(2 51)')) > 1000.0);

View file

@ -215,7 +215,7 @@ UMAP_DEMO_SITE = False
UMAP_EXCLUDE_DEFAULT_MAPS = False UMAP_EXCLUDE_DEFAULT_MAPS = False
UMAP_MAPS_PER_PAGE = 5 UMAP_MAPS_PER_PAGE = 5
UMAP_MAPS_PER_PAGE_OWNER = 10 UMAP_MAPS_PER_PAGE_OWNER = 10
UMAP_USE_UNACCENT = False UMAP_SEARCH_CONFIGURATION = "simple"
UMAP_FEEDBACK_LINK = "https://wiki.openstreetmap.org/wiki/UMap#Feedback_and_help" # noqa UMAP_FEEDBACK_LINK = "https://wiki.openstreetmap.org/wiki/UMap#Feedback_and_help" # noqa
USER_MAPS_URL = 'user_maps' USER_MAPS_URL = 'user_maps'
DATABASES = { DATABASES = {

View file

@ -94,11 +94,6 @@ SHORT_SITE_URL = "http://s.hort"
# POSTGIS_VERSION = (2, 1, 0) # POSTGIS_VERSION = (2, 1, 0)
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
# You need to unable accent extension before using UMAP_USE_UNACCENT
# python manage.py dbshell
# CREATE EXTENSION unaccent;
UMAP_USE_UNACCENT = False
# Put the site in readonly mode (useful for migration or any maintenance) # Put the site in readonly mode (useful for migration or any maintenance)
UMAP_READONLY = False UMAP_READONLY = False

View file

@ -529,3 +529,13 @@ def test_create_readonly(client, user, post_data, settings):
response = client.post(url, post_data) response = client.post(url, post_data)
assert response.status_code == 403 assert response.status_code == 403
assert response.content == b'Site is readonly for maintenance' assert response.content == b'Site is readonly for maintenance'
def test_search(client, map):
# Very basic search, that do not deal with accent nor case.
# See install.md for how to have a smarter dict + index.
map.name = "Blé dur"
map.save()
url = reverse("search")
response = client.get(url + "?q=Blé")
assert "Blé dur" in response.content.decode()

View file

@ -10,6 +10,7 @@ from django.contrib import messages
from django.contrib.auth import logout as do_logout from django.contrib.auth import logout as do_logout
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.gis.measure import D from django.contrib.gis.measure import D
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
from django.core.signing import BadSignature, Signer from django.core.signing import BadSignature, Signer
from django.core.validators import URLValidator, ValidationError from django.core.validators import URLValidator, ValidationError
@ -140,7 +141,6 @@ home = Home.as_view()
class About(Home): class About(Home):
template_name = "umap/about.html" template_name = "umap/about.html"
@ -190,13 +190,13 @@ class Search(TemplateView, PaginatorMixin):
q = self.request.GET.get("q") q = self.request.GET.get("q")
results = [] results = []
if q: if q:
where = "to_tsvector(name) @@ plainto_tsquery(%s)" vector = SearchVector("name", config=settings.UMAP_SEARCH_CONFIGURATION)
if getattr(settings, "UMAP_USE_UNACCENT", False): query = SearchQuery(
where = "to_tsvector(unaccent(name)) @@ plainto_tsquery(unaccent(%s))" # noqa q, config=settings.UMAP_SEARCH_CONFIGURATION, search_type="websearch"
results = Map.objects.filter(share_status=Map.PUBLIC) )
results = results.extra(where=[where], params=[q]) qs = Map.objects.annotate(search=vector).filter(search=query)
results = results.order_by("-modified_at") qs = qs.filter(share_status=Map.PUBLIC).order_by('-modified_at')
results = self.paginate(results) results = self.paginate(qs)
kwargs.update({"maps": results, "q": q}) kwargs.update({"maps": results, "q": q})
return kwargs return kwargs
@ -352,7 +352,6 @@ class FormLessEditMixin:
class MapDetailMixin: class MapDetailMixin:
model = Map model = Map
def get_context_data(self, **kwargs): def get_context_data(self, **kwargs):
@ -649,7 +648,6 @@ class MapShortUrl(RedirectView):
class MapAnonymousEditUrl(RedirectView): class MapAnonymousEditUrl(RedirectView):
permanent = False permanent = False
def get(self, request, *args, **kwargs): def get(self, request, *args, **kwargs):
@ -657,7 +655,7 @@ class MapAnonymousEditUrl(RedirectView):
try: try:
pk = signer.unsign(self.kwargs["signature"]) pk = signer.unsign(self.kwargs["signature"])
except BadSignature: except BadSignature:
signer = Signer(algorithm='sha1') signer = Signer(algorithm="sha1")
try: try:
pk = signer.unsign(self.kwargs["signature"]) pk = signer.unsign(self.kwargs["signature"])
except BadSignature: except BadSignature:
@ -680,7 +678,6 @@ class MapAnonymousEditUrl(RedirectView):
class GZipMixin(object): class GZipMixin(object):
EXT = ".gz" EXT = ".gz"
@property @property