Skip to content

Commit e6468f6

Browse files
committed
✨(backend) improve search indexer service configuration
New SEARCH_INDEXER_CLASS setting to define the indexer service class. Raise ImpoperlyConfigured errors instead of RuntimeError in index service. Signed-off-by: Fabre Florian <[email protected]>
1 parent a932808 commit e6468f6

File tree

11 files changed

+193
-99
lines changed

11 files changed

+193
-99
lines changed

env.d/development/common

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ OIDC_AUTH_REQUEST_EXTRA_PARAMS={"acr_values": "eidas1"}
5252
# Store OIDC tokens in the session
5353
OIDC_STORE_ACCESS_TOKEN = True # Store the access token in the session
5454
OIDC_STORE_REFRESH_TOKEN = True # Store the encrypted refresh token in the session
55-
OIDC_STORE_REFRESH_TOKEN_KEY = AnExampleKeyForDevPurposeOnly
55+
OIDC_STORE_REFRESH_TOKEN_KEY = ThisIsAnExampleKeyForDevPurposeOnly
5656

5757
# AI
5858
AI_FEATURE_ENABLED=true

src/backend/core/api/serializers.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -826,12 +826,4 @@ class MoveDocumentSerializer(serializers.Serializer):
826826
class FindDocumentSerializer(serializers.Serializer):
827827
"""Serializer for Find search requests"""
828828

829-
q = serializers.CharField(required=True)
830-
831-
def validate_q(self, value):
832-
"""Ensure the text field is not empty."""
833-
834-
if len(value.strip()) == 0:
835-
raise serializers.ValidationError("Text field cannot be empty.")
836-
837-
return value
829+
q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True)

src/backend/core/api/viewsets.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from django.contrib.postgres.aggregates import ArrayAgg
1313
from django.contrib.postgres.search import TrigramSimilarity
1414
from django.core.cache import cache
15-
from django.core.exceptions import ValidationError
15+
from django.core.exceptions import ImproperlyConfigured, ValidationError
1616
from django.core.files.storage import default_storage
1717
from django.core.validators import URLValidator
1818
from django.db import connection, transaction
@@ -49,7 +49,7 @@
4949
from core.services.converter_services import (
5050
YdocConverter,
5151
)
52-
from core.services.search_indexers import FindDocumentIndexer
52+
from core.services.search_indexers import get_document_indexer_class
5353
from core.tasks.mail import send_ask_for_access_mail
5454
from core.utils import extract_attachments, filter_descendants
5555

@@ -1047,15 +1047,15 @@ def search(self, request, *args, **kwargs):
10471047
serializer.is_valid(raise_exception=True)
10481048

10491049
try:
1050-
indexer = FindDocumentIndexer()
1050+
indexer = get_document_indexer_class()()
10511051
queryset = indexer.search(
10521052
text=serializer.validated_data.get("q", ""),
10531053
user=request.user,
10541054
token=access_token,
10551055
)
1056-
except RuntimeError:
1056+
except ImproperlyConfigured:
10571057
return drf.response.Response(
1058-
{"detail": "The service is not configured properly."},
1058+
{"detail": "The service is not properly configured."},
10591059
status=status.HTTP_401_UNAUTHORIZED,
10601060
)
10611061

src/backend/core/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def restore(self):
951951

952952

953953
@receiver(signals.post_save, sender=Document)
954-
def document_post_save(sender, instance, **kwargs):
954+
def document_post_save(sender, instance, **kwargs): # pylint: disable=unused-argument
955955
"""
956956
Asynchronous call to the document indexer at the end of the transaction.
957957
Note : Within the transaction we can have an empty content and a serialization
@@ -1186,7 +1186,7 @@ def get_abilities(self, user):
11861186

11871187

11881188
@receiver(signals.post_save, sender=DocumentAccess)
1189-
def document_access_post_save(sender, instance, created, **kwargs):
1189+
def document_access_post_save(sender, instance, created, **kwargs): # pylint: disable=unused-argument
11901190
"""
11911191
Asynchronous call to the document indexer at the end of the transaction.
11921192
"""

src/backend/core/services/search_indexers.py

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
import logging
44
from abc import ABC, abstractmethod
55
from collections import defaultdict
6+
from functools import cache
67

78
from django.conf import settings
89
from django.contrib.auth.models import AnonymousUser
10+
from django.core.exceptions import ImproperlyConfigured
11+
from django.utils.module_loading import import_string
912

1013
import requests
1114

@@ -14,18 +17,33 @@
1417
logger = logging.getLogger(__name__)
1518

1619

20+
@cache
21+
def get_document_indexer_class() -> "BaseDocumentIndexer":
22+
"""Return the indexer backend class based on the settings."""
23+
classpath = settings.SEARCH_INDEXER_CLASS
24+
25+
if not classpath:
26+
raise ImproperlyConfigured(
27+
"SEARCH_INDEXER_CLASS must be set in Django settings."
28+
)
29+
30+
try:
31+
return import_string(settings.SEARCH_INDEXER_CLASS)
32+
except ImportError as err:
33+
raise ImproperlyConfigured(
34+
f"SEARCH_INDEXER_CLASS setting is not valid : {err}"
35+
) from err
36+
37+
1738
def get_batch_accesses_by_users_and_teams(paths):
1839
"""
1940
Get accesses related to a list of document paths,
2041
grouped by users and teams, including all ancestor paths.
2142
"""
22-
# print("paths: ", paths)
2343
ancestor_map = utils.get_ancestor_to_descendants_map(
2444
paths, steplen=models.Document.steplen
2545
)
2646
ancestor_paths = list(ancestor_map.keys())
27-
# print("ancestor map: ", ancestor_map)
28-
# print("ancestor paths: ", ancestor_paths)
2947

3048
access_qs = models.DocumentAccess.objects.filter(
3149
document__path__in=ancestor_paths
@@ -80,6 +98,24 @@ def __init__(self, batch_size=None):
8098
Defaults to settings.SEARCH_INDEXER_BATCH_SIZE.
8199
"""
82100
self.batch_size = batch_size or settings.SEARCH_INDEXER_BATCH_SIZE
101+
self.indexer_url = settings.SEARCH_INDEXER_URL
102+
self.indexer_secret = settings.SEARCH_INDEXER_SECRET
103+
self.search_url = settings.SEARCH_INDEXER_QUERY_URL
104+
105+
if not self.indexer_url:
106+
raise ImproperlyConfigured(
107+
"SEARCH_INDEXER_URL must be set in Django settings."
108+
)
109+
110+
if not self.indexer_secret:
111+
raise ImproperlyConfigured(
112+
"SEARCH_INDEXER_SECRET must be set in Django settings."
113+
)
114+
115+
if not self.search_url:
116+
raise ImproperlyConfigured(
117+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings."
118+
)
83119

84120
def index(self):
85121
"""
@@ -143,7 +179,7 @@ def search(self, text, user, token):
143179
@abstractmethod
144180
def search_query(self, data, token) -> dict:
145181
"""
146-
Retreive documents from the Find app API.
182+
Retrieve documents from the Find app API.
147183
148184
Must be implemented by subclasses.
149185
"""
@@ -204,16 +240,9 @@ def search_query(self, data, token) -> requests.Response:
204240
Returns:
205241
dict: A JSON-serializable dictionary.
206242
"""
207-
url = getattr(settings, "SEARCH_INDEXER_QUERY_URL", None)
208-
209-
if not url:
210-
raise RuntimeError(
211-
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
212-
)
213-
214243
try:
215244
response = requests.post(
216-
url,
245+
self.search_url,
217246
json=data,
218247
headers={"Authorization": f"Bearer {token}"},
219248
timeout=10,
@@ -222,7 +251,6 @@ def search_query(self, data, token) -> requests.Response:
222251
return response.json()
223252
except requests.exceptions.HTTPError as e:
224253
logger.error("HTTPError: %s", e)
225-
logger.error("Response content: %s", response.text) # type: ignore
226254
raise
227255

228256
def format_response(self, data: dict):
@@ -238,27 +266,15 @@ def push(self, data):
238266
Args:
239267
data (list): List of document dictionaries.
240268
"""
241-
url = getattr(settings, "SEARCH_INDEXER_URL", None)
242-
if not url:
243-
raise RuntimeError(
244-
"SEARCH_INDEXER_URL must be set in Django settings before indexing."
245-
)
246-
247-
secret = getattr(settings, "SEARCH_INDEXER_SECRET", None)
248-
if not secret:
249-
raise RuntimeError(
250-
"SEARCH_INDEXER_SECRET must be set in Django settings before indexing."
251-
)
252269

253270
try:
254271
response = requests.post(
255-
url,
272+
self.indexer_url,
256273
json=data,
257-
headers={"Authorization": f"Bearer {secret}"},
274+
headers={"Authorization": f"Bearer {self.indexer_secret}"},
258275
timeout=10,
259276
)
260277
response.raise_for_status()
261278
except requests.exceptions.HTTPError as e:
262279
logger.error("HTTPError: %s", e)
263-
logger.error("Response content: %s", response.text)
264280
raise

src/backend/core/tasks/find.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Trigger document indexation using celery task."""
22

3+
from functools import partial
34
from logging import getLogger
45

56
from django.conf import settings
@@ -8,8 +9,8 @@
89

910
from core import models
1011
from core.services.search_indexers import (
11-
FindDocumentIndexer,
1212
get_batch_accesses_by_users_and_teams,
13+
get_document_indexer_class,
1314
)
1415

1516
from impress.celery_app import app
@@ -52,7 +53,7 @@ def document_indexer_task(document_id):
5253
return
5354

5455
doc = models.Document.objects.get(pk=document_id)
55-
indexer = FindDocumentIndexer()
56+
indexer = get_document_indexer_class()()
5657
accesses = get_batch_accesses_by_users_and_teams((doc.path,))
5758

5859
data = indexer.serialize_document(document=doc, accesses=accesses)
@@ -75,11 +76,9 @@ def trigger_document_indexer(document, on_commit=False):
7576
pass
7677

7778
if on_commit:
78-
79-
def _aux():
80-
trigger_document_indexer(document, on_commit=False)
81-
82-
transaction.on_commit(_aux)
79+
transaction.on_commit(
80+
partial(trigger_document_indexer, document, on_commit=False)
81+
)
8382
else:
8483
key = document_indexer_debounce_key(document.pk)
8584
countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1)

src/backend/core/tests/commands/test_index.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Unit test for `index` command.
33
"""
44

5+
from operator import itemgetter
56
from unittest import mock
67

78
from django.core.management import call_command
@@ -34,19 +35,18 @@ def test_index():
3435
str(no_title_doc.path): {"users": [user.sub]},
3536
}
3637

37-
def sortkey(d):
38-
return d["id"]
39-
4038
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
4139
call_command("index")
4240

4341
push_call_args = [call.args[0] for call in mock_push.call_args_list]
4442

45-
assert len(push_call_args) == 1 # called once but with a batch of docs
46-
assert sorted(push_call_args[0], key=sortkey) == sorted(
43+
# called once but with a batch of docs
44+
mock_push.assert_called_once()
45+
46+
assert sorted(push_call_args[0], key=itemgetter("id")) == sorted(
4747
[
4848
indexer.serialize_document(doc, accesses),
4949
indexer.serialize_document(no_title_doc, accesses),
5050
],
51-
key=sortkey,
51+
key=itemgetter("id"),
5252
)

src/backend/core/tests/documents/test_api_documents_search.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_api_documents_search_endpoint_is_none(settings):
5757
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
5858

5959
assert response.status_code == 401
60-
assert response.json() == {"detail": "The service is not configured properly."}
60+
assert response.json() == {"detail": "The service is not properly configured."}
6161

6262

6363
@responses.activate
@@ -75,6 +75,11 @@ def test_api_documents_search_invalid_params(settings):
7575
assert response.status_code == 400
7676
assert response.json() == {"q": ["This field is required."]}
7777

78+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": " "})
79+
80+
assert response.status_code == 400
81+
assert response.json() == {"q": ["This field may not be blank."]}
82+
7883

7984
@responses.activate
8085
def test_api_documents_search_format(settings):

src/backend/core/tests/test_models_documents.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
# pylint: disable=too-many-lines
55

6+
from operator import itemgetter
67
import random
78
import smtplib
89
import time
@@ -1432,7 +1433,7 @@ def test_models_documents_post_save_indexer(mock_push, settings):
14321433
factories.UserDocumentAccessFactory(document=doc2, user=user)
14331434
factories.UserDocumentAccessFactory(document=doc3, user=user)
14341435

1435-
time.sleep(0.1) # waits for the end of the tasks
1436+
time.sleep(0.2) # waits for the end of the tasks
14361437

14371438
accesses = {
14381439
str(doc1.path): {"users": [user.sub]},
@@ -1444,16 +1445,13 @@ def test_models_documents_post_save_indexer(mock_push, settings):
14441445

14451446
indexer = FindDocumentIndexer()
14461447

1447-
def sortkey(d):
1448-
return d["id"]
1449-
1450-
assert sorted(data, key=sortkey) == sorted(
1448+
assert sorted(data, key=itemgetter('id')) == sorted(
14511449
[
14521450
indexer.serialize_document(doc1, accesses),
14531451
indexer.serialize_document(doc2, accesses),
14541452
indexer.serialize_document(doc3, accesses),
14551453
],
1456-
key=sortkey,
1454+
key=itemgetter('id'),
14571455
)
14581456

14591457
# The debounce counters should be reset

0 commit comments

Comments
 (0)