From 4ac14cc1de4d2a1f4fad875c2ec3746c6cf9572d Mon Sep 17 00:00:00 2001 From: Hugo Rodger-Brown Date: Thu, 17 Jan 2019 14:25:14 +0000 Subject: [PATCH] Update master branch to ES6 only (#36) This commit will update the master branch to require ES6 and above. It formally deprecates ES5 and ES2, which will now live on in named branches only. Aside from documentation, and updating the dependency versions, this commit also create a separate code path for partial document updates, so that instead of a single as_search_document method that takes an optional update_fields kwarg, we have two separate methods, as_search_document and as_search_document_update. Only the later has the update_fields arg, which is mandatory. There are examples in the README, so recommendation is to start there. One other addition is that the repo has been "Blackened" - so the formatting of app *.py files should now be consistent. --- Pipfile | 4 + README.rst | 65 +++++++- elasticsearch_django/__init__.py | 4 +- elasticsearch_django/admin.py | 69 ++++---- elasticsearch_django/apps.py | 40 +++-- elasticsearch_django/index.py | 36 ++-- elasticsearch_django/models.py | 190 +++++++++++----------- elasticsearch_django/settings.py | 30 ++-- elasticsearch_django/tests/__init__.py | 5 +- elasticsearch_django/tests/test_apps.py | 86 +++++----- elasticsearch_django/tests/test_models.py | 7 +- elasticsearch_django/utils.py | 6 +- setup.py | 4 +- 13 files changed, 306 insertions(+), 240 deletions(-) diff --git a/Pipfile b/Pipfile index f053e6f..23f6bfd 100644 --- a/Pipfile +++ b/Pipfile @@ -6,9 +6,13 @@ verify_ssl = true [dev-packages] tox = "*" coverage = "*" +black = "*" [packages] elasticsearch-django = {path = ".",editable = true} [requires] python_version = "3.7" + +[pipenv] +allow_prereleases = true diff --git a/README.rst b/README.rst index 6f2a7c9..77c0fb3 100644 --- a/README.rst +++ b/README.rst @@ -11,9 +11,7 @@ Elasticsearch for Django This is a lightweight Django app for people who are using Elasticsearch with Django, and want to manage their indexes. -**NB the master branch is now based on ES5.x. If you are using ES2.x, please switch to the ES2 branch (released on PyPI as 2.x)** - -Status +**NB the master branch is now based on ES6. If you are using ES2/ES5, please switch to the relevant branch (released on PyPI as 2.x, 5.x)** ---- @@ -104,13 +102,70 @@ So far we have configured Django to know the names of the indexes we want, and t **SearchDocumentMixin** -This mixin must be implemented by the model itself, and it requires a single method implementation - ``as_search_document()``. This should return a dict that is the index representation of the object; the ``index`` kwarg can be used to provide different representations for different indexes. By default this is ``_all`` which means that all indexes receive the same document for a given object. +This mixin is responsible for the seaerch index document format. We are indexing JSON representations of each object, and we have two methods on the mixin responsible for outputting the correct format - ``as_search_document`` and ``as_search_document_update``. + +An aside on the mechanics of the ``auto_sync`` process, which is hooked up using Django's ``post_save`` and ``post_delete`` model signals. ES supports partial updates to documents that already exist, and we make a fundamental assumption about indexing models - that **if you pass the ``update_fields`` kwarg to a ``model.save`` method call, then you are performing a partial update**, and this will be propagated to ES as a partial update only. + +To this end, we have two methods for generating the model's JSON representation - ``as_search_document``, which should return a dict that represents the entire object; and ``as_search_document_update``, which takes the ``update_fields`` kwarg, and should return a dict that contains just the fields that are being updated. (NB this is not automated as the field being updated may itself be an object, which needs specific formatting - see below for an example). + +To better understand this, let us say that we have a model (``MyModel``) that is configured to be included in an index called ``myindex``. If we save an object, without passing ``update_fields``, then this is considered a full document update, which triggers the object's ``index_search_document`` method: + +.. code:: python + + obj = MyModel.objects.first() + obj.save() + ... + # AUTO_SYNC=true will trigger a re-index of the complete object document: + obj.index_search_document(index='myindex') + +However, if we only want to update a single field (say the ``timestamp``), and we pass this in to the save method, then this will trigger the ``update_search_document`` method, passing in the names of the fields that we want updated. + +.. code:: python + + # save a single field on the object + obj.save(update_fields=['timestamp']) + ... + # AUTO_SYNC=true will trigger a partial update of the object document + obj.update_search_document(index, update_fields=['timestamp']) + +We pass the name of the index being updated as the first arg, as objects may have different representations in different indexes: .. code:: python - def as_search_document(self, index='_all'): + def as_search_document(self, index): return {'name': "foo"} if index == 'foo' else {'name': "bar"} +In the case of the second method, the simplest possible implementation would be a dictionary containing the names of the fields being updated and their new values. However, if the value is itself an object, it will have to serialized properly. As an example: + +.. code:: python + + def as_search_document_update(self, index, update_fields): + # create a basic attr: value dict from the fields that were updated + doc = {f: getattr(self, f) for f in update_fields} + # if the 'user' attr was updated, we need to convert this from + # a User object to something ES can index - in this case just the full name + # (NB GPDR klaxon sounding at this point - do you have permission to do this?) + if 'user' in doc: + doc['user'] = self.user.get_full_name() + return doc + +The reason we have split out the update from the full-document index comes from a real problem that we ourselves suffered. The full object representation that we were using was quite DB intensive - we were storing properties of the model that required walking the ORM tree. However, because we were also touching the objects (see below) to record activity timestamps, we ended up flooding the database with queries simply to update a single field in the output document. Partial updates solves this issue: + +.. code:: python + + def touch(self): + self.timestamp = now() + self.save(update_fields=['timestamp']) + + def as_search_document_update(self, index, update_fields): + if update_fields == ['timestamp']: + # only propagate changes if it's +1hr since the last timestamp change + if now() - self.timestamp < timedelta(hours=1): + return {} + else: + return {'timestamp': self.timestamp} + .... + **SearchDocumentManagerMixin** This mixin must be implemented by the model's default manager (``objects``). It also requires a single method implementation - ``get_search_queryset()`` - which returns a queryset of objects that are to be indexed. This can also use the ``index`` kwarg to provide different sets of objects to different indexes. diff --git a/elasticsearch_django/__init__.py b/elasticsearch_django/__init__.py index 3591b63..74b07a2 100644 --- a/elasticsearch_django/__init__.py +++ b/elasticsearch_django/__init__.py @@ -1,3 +1,3 @@ -default_app_config = 'elasticsearch_django.apps.ElasticAppConfig' +default_app_config = "elasticsearch_django.apps.ElasticAppConfig" -__version__ = '5.3' \ No newline at end of file +__version__ = "6.0" diff --git a/elasticsearch_django/admin.py b/elasticsearch_django/admin.py index 08a6184..31b7c3e 100644 --- a/elasticsearch_django/admin.py +++ b/elasticsearch_django/admin.py @@ -19,12 +19,7 @@ def pprint(data): until someone builds a custom syntax function. """ - pretty = json.dumps( - data, - sort_keys=True, - indent=4, - separators=(',', ': ') - ) + pretty = json.dumps(data, sort_keys=True, indent=4, separators=(",", ": ")) html = pretty.replace(" ", " ").replace("\n", "
") return mark_safe("%s" % html) @@ -32,39 +27,32 @@ def pprint(data): class SearchQueryAdmin(admin.ModelAdmin): list_display = ( - 'id', - 'user', - 'search_terms_', - 'total_hits', - 'returned_', - 'min_', - 'max_', - 'reference', - 'executed_at', - ) - list_filter = ( - 'index', - ) - search_fields = ( - 'search_terms', - 'user__first_name', - 'user__last_name', - 'reference' + "id", + "user", + "search_terms_", + "total_hits", + "returned_", + "min_", + "max_", + "reference", + "executed_at", ) + list_filter = ("index",) + search_fields = ("search_terms", "user__first_name", "user__last_name", "reference") # excluding because we are using a pretty version instead - exclude = ('hits', 'query', 'page') + exclude = ("hits", "query", "page") readonly_fields = ( - 'user', - 'index', - 'search_terms', - 'total_hits', - 'returned_', - 'min_', - 'max_', - 'duration', - 'query_', - 'hits_', - 'executed_at', + "user", + "index", + "search_terms", + "total_hits", + "returned_", + "min_", + "max_", + "duration", + "query_", + "hits_", + "executed_at", ) def search_terms_(self, instance): @@ -79,18 +67,20 @@ def query_(self, instance): def max_(self, instance): """Pretty version of max_score.""" - return '-' if instance.page_size == 0 else instance.max_score + return "-" if instance.page_size == 0 else instance.max_score + max_.short_description = "Max score" def min_(self, instance): """Pretty version of min_score.""" - return '-' if instance.page_size == 0 else instance.min_score + return "-" if instance.page_size == 0 else instance.min_score + min_.short_description = "Min score" def returned_(self, instance): """Number of hits returned in the page.""" if instance.page_size == 0: - return '-' + return "-" else: return "%i - %i" % (instance.page_from, instance.page_to) @@ -100,4 +90,5 @@ def hits_(self, instance): """Pretty version of hits JSON.""" return pprint(instance.hits) + admin.site.register(SearchQuery, SearchQueryAdmin) diff --git a/elasticsearch_django/apps.py b/elasticsearch_django/apps.py index baa0760..2c29d54 100644 --- a/elasticsearch_django/apps.py +++ b/elasticsearch_django/apps.py @@ -13,15 +13,15 @@ class ElasticAppConfig(AppConfig): """AppConfig for Search3.""" - name = 'elasticsearch_django' + name = "elasticsearch_django" verbose_name = "Elasticsearch" configs = [] def ready(self): """Validate config and connect signals.""" super(ElasticAppConfig, self).ready() - _validate_config(settings.get_setting('strict_validation')) - if settings.get_setting('auto_sync'): + _validate_config(settings.get_setting("strict_validation")) + if settings.get_setting("auto_sync"): _connect_signals() else: logger.debug("SEARCH_AUTO_SYNC has been disabled.") @@ -48,11 +48,9 @@ def _validate_mapping(index, strict=False): def _validate_model(model): """Check that a model configured for an index subclasses the required classes.""" - if not hasattr(model, 'as_search_document'): - raise ImproperlyConfigured( - "'%s' must implement `as_search_document`." % model - ) - if not hasattr(model.objects, 'get_search_queryset'): + if not hasattr(model, "as_search_document"): + raise ImproperlyConfigured("'%s' must implement `as_search_document`." % model) + if not hasattr(model.objects, "get_search_queryset"): raise ImproperlyConfigured( "'%s.objects must implement `get_search_queryset`." % model ) @@ -68,22 +66,30 @@ def _connect_signals(): def _connect_model_signals(model): """Connect signals for a single model.""" if settings.auto_sync(model): - dispatch_uid = '%s.post_save' % model._meta.model_name + dispatch_uid = "%s.post_save" % model._meta.model_name logger.debug("Connecting search index model sync signal: %s", dispatch_uid) - signals.post_save.connect(_on_model_save, sender=model, dispatch_uid=dispatch_uid) - dispatch_uid = '%s.post_delete' % model._meta.model_name + signals.post_save.connect( + _on_model_save, sender=model, dispatch_uid=dispatch_uid + ) + dispatch_uid = "%s.post_delete" % model._meta.model_name logger.debug("Connecting search index model sync signal: %s", dispatch_uid) - signals.post_delete.connect(_on_model_delete, sender=model, dispatch_uid=dispatch_uid) + signals.post_delete.connect( + _on_model_delete, sender=model, dispatch_uid=dispatch_uid + ) else: - logger.debug("Auto sync disabled for '%s', ignoring update.", model._meta.model_name) + logger.debug( + "Auto sync disabled for '%s', ignoring update.", model._meta.model_name + ) def _on_model_save(sender, **kwargs): """Update document in search index post_save.""" - update_fields = kwargs['update_fields'] - instance = kwargs['instance'] + update_fields = kwargs["update_fields"] + instance = kwargs["instance"] for index in instance.search_indexes: - _update_search_index(instance=instance, index=index, update_fields=update_fields) + _update_search_index( + instance=instance, index=index, update_fields=update_fields + ) def _update_search_index(*, instance, index, update_fields): @@ -106,7 +112,7 @@ def _on_model_delete(sender, **kwargs): the object will no longer in exist in the database. """ - instance = kwargs['instance'] + instance = kwargs["instance"] for index in instance.search_indexes: try: instance.delete_search_document(index=index) diff --git a/elasticsearch_django/index.py b/elasticsearch_django/index.py index b0c6ec7..0ffe661 100644 --- a/elasticsearch_django/index.py +++ b/elasticsearch_django/index.py @@ -2,12 +2,7 @@ from elasticsearch import helpers -from .settings import ( - get_setting, - get_index_mapping, - get_index_models, - get_client -) +from .settings import get_setting, get_index_mapping, get_index_models, get_client logger = logging.getLogger(__name__) @@ -27,8 +22,8 @@ def update_index(index): for model in get_index_models(index): logger.info("Updating search index model: '%s'", model.search_doc_type) objects = model.objects.get_search_queryset(index).iterator() - actions = bulk_actions(objects, index=index, action='index') - response = helpers.bulk(client, actions, chunk_size=get_setting('chunk_size')) + actions = bulk_actions(objects, index=index, action="index") + response = helpers.bulk(client, actions, chunk_size=get_setting("chunk_size")) responses.append(response) return responses @@ -68,11 +63,15 @@ def prune_index(index): prunes.append(obj) logger.info( "Found %s objects of type '%s' for deletion from '%s'.", - len(prunes), model, index + len(prunes), + model, + index, ) if len(prunes) > 0: - actions = bulk_actions(prunes, index, 'delete') - response = helpers.bulk(client, actions, chunk_size=get_setting('chunk_size')) + actions = bulk_actions(prunes, index, "delete") + response = helpers.bulk( + client, actions, chunk_size=get_setting("chunk_size") + ) responses.append(response) return responses @@ -98,18 +97,19 @@ def _prune_hit(hit, model): a 'delete' action. """ - hit_id = hit['_id'] - hit_index = hit['_index'] + hit_id = hit["_id"] + hit_index = hit["_index"] if model.objects.in_search_queryset(hit_id, index=hit_index): logger.debug( - "%s with id=%s exists in the '%s' index queryset.", - model, hit_id, hit_index + "%s with id=%s exists in the '%s' index queryset.", model, hit_id, hit_index ) return None else: logger.debug( "%s with id=%s does not exist in the '%s' index queryset and will be pruned.", - model, hit_id, hit_index + model, + hit_id, + hit_index, ) # we don't need the full obj for a delete action, just the id. # (the object itself may not even exist.) @@ -158,7 +158,9 @@ def bulk_actions(objects, index, action): how the final document is formatted. """ - assert index != '_all', "index arg must be a valid index name. '_all' is a reserved term." + assert ( + index != "_all" + ), "index arg must be a valid index name. '_all' is a reserved term." logger.info("Creating bulk '%s' actions for '%s'", action, index) for obj in objects: try: diff --git a/elasticsearch_django/models.py b/elasticsearch_django/models.py index 2e7035e..520309e 100644 --- a/elasticsearch_django/models.py +++ b/elasticsearch_django/models.py @@ -11,11 +11,7 @@ from django.db.models.fields import CharField from django.utils.timezone import now as tz_now -from .settings import ( - get_client, - get_setting, - get_model_indexes, -) +from .settings import get_client, get_setting, get_model_indexes logger = logging.getLogger(__name__) @@ -32,7 +28,7 @@ class SearchDocumentManagerMixin(object): """ - def get_search_queryset(self, index='_all'): + def get_search_queryset(self, index="_all"): """ Return the dataset used to populate the search index. @@ -51,7 +47,7 @@ def get_search_queryset(self, index='_all'): ) ) - def in_search_queryset(self, instance_id, index='_all'): + def in_search_queryset(self, instance_id, index="_all"): """ Return True if an object is part of the search index queryset. @@ -115,16 +111,16 @@ def from_search_query(self, search_query): """ hits = search_query.hits - score_sql = self._raw_sql([(h['id'], h['score'] or 0) for h in hits]) - rank_sql = self._raw_sql([(hits[i]['id'], i) for i in range(len(hits))]) + score_sql = self._raw_sql([(h["id"], h["score"] or 0) for h in hits]) + rank_sql = self._raw_sql([(hits[i]["id"], i) for i in range(len(hits))]) return ( self.get_queryset() - .filter(pk__in=[h['id'] for h in hits]) + .filter(pk__in=[h["id"] for h in hits]) # add the query relevance score .annotate(search_score=RawSQL(score_sql, ())) # add the ordering number (0-based) .annotate(search_rank=RawSQL(rank_sql, ())) - .order_by('search_rank') + .order_by("search_rank") ) def _when(self, x, y): @@ -133,12 +129,16 @@ def _when(self, x, y): def _raw_sql(self, values): """Prepare SQL statement consisting of a sequence of WHEN .. THEN statements.""" if isinstance(self.model._meta.pk, CharField): - when_clauses = ' '.join([self._when("'{}'".format(x), y) for (x, y) in values]) + when_clauses = " ".join( + [self._when("'{}'".format(x), y) for (x, y) in values] + ) else: - when_clauses = ' '.join([self._when(x, y) for (x, y) in values]) + when_clauses = " ".join([self._when(x, y) for (x, y) in values]) table_name = self.model._meta.db_table primary_key = self.model._meta.pk.column - return 'SELECT CASE {}."{}" {} ELSE 0 END'.format(table_name, primary_key, when_clauses) + return 'SELECT CASE {}."{}" {} ELSE 0 END'.format( + table_name, primary_key, when_clauses + ) class SearchDocumentMixin(object): @@ -160,10 +160,8 @@ def search_indexes(self): @property def search_document_cache_key(self): """Key used for storing search docs in local cache.""" - return 'elasticsearch_django:{}.{}.{}'.format( - self._meta.app_label, - self._meta.model_name, - self.pk + return "elasticsearch_django:{}.{}.{}".format( + self._meta.app_label, self._meta.model_name, self.pk ) @property @@ -171,7 +169,7 @@ def search_doc_type(self): """Return the doc_type used for the model.""" return self._meta.model_name - def as_search_document(self, index='_all', update_fields=None): + def as_search_document(self, *, index): """ Return the object as represented in a named index. @@ -185,18 +183,33 @@ def as_search_document(self, index='_all', update_fields=None): appear - this allows different representations in different indexes. Defaults to '_all', in which case all indexes use the same search document structure. - update_fields: List of strings, list of fields to be updated in - the case of a partial update - this allows a much faster - update when it is known only 1 field has changed for example. Returns a dictionary. """ raise NotImplementedError( - "{} does not implement 'get_search_document'.".format(self.__class__.__name__) + "{} does not implement 'as_search_document'.".format( + self.__class__.__name__ + ) + ) + + def as_search_document_update(self, *, index, update_fields): + """ + Return a partial update document based on which fields have been updated. + + If an object is saved with the `update_fields` argument passed + through, then it is assumed that this is a 'partial update'. In + this scenario we need a {property: value} dictionary containing + just the fields we want to update. + + """ + raise NotImplementedError( + "{} does not implement 'as_search_document_update'.".format( + self.__class__.__name__ + ) ) - def as_search_action(self, index, action): + def as_search_action(self, *, index, action): """ Return an object as represented in a bulk api operation. @@ -216,31 +229,27 @@ def as_search_action(self, index, action): Returns a dictionary. """ - if action not in ('index', 'update', 'delete'): + if action not in ("index", "update", "delete"): raise ValueError("Action must be 'index', 'update' or 'delete'.") document = { - '_index': index, - '_type': self.search_doc_type, - '_op_type': action, - '_id': self.pk, + "_index": index, + "_type": self.search_doc_type, + "_op_type": action, + "_id": self.pk, } - if action == 'index': - document['_source'] = self.as_search_document(index) - elif action == 'update': - document['doc'] = self.as_search_document(index) + if action == "index": + document["_source"] = self.as_search_document(index=index) + elif action == "update": + document["doc"] = self.as_search_document(index=index) return document - def fetch_search_document(self, index): + def fetch_search_document(self, *, index): """Fetch the object's document from a search index by id.""" assert self.pk, "Object must have a primary key before being indexed." client = get_client() - return client.get( - index=index, - doc_type=self.search_doc_type, - id=self.pk - ) + return client.get(index=index, doc_type=self.search_doc_type, id=self.pk) def index_search_document(self, *, index): """ @@ -253,17 +262,14 @@ def index_search_document(self, *, index): """ cache_key = self.search_document_cache_key - new_doc = self.as_search_document(index) + new_doc = self.as_search_document(index=index) cached_doc = cache.get(cache_key) if new_doc == cached_doc: logger.debug("Search document for %r is unchanged, ignoring update.", self) return [] - cache.set(cache_key, new_doc, timeout=get_setting('CACHE_EXPIRY', 60)) + cache.set(cache_key, new_doc, timeout=get_setting("CACHE_EXPIRY", 60)) get_client().index( - index=index, - doc_type=self.search_doc_type, - body=new_doc, - id=self.pk + index=index, doc_type=self.search_doc_type, body=new_doc, id=self.pk ) def update_search_document(self, *, index, update_fields): @@ -289,18 +295,18 @@ def update_search_document(self, *, index, update_fields): get_client().update( index=index, doc_type=self.search_doc_type, - body=dict(doc=self.as_search_document(index, update_fields=update_fields)), - id=self.pk + body={ + "doc": self.as_search_document_update( + index=index, update_fields=update_fields + ) + }, + id=self.pk, ) def delete_search_document(self, *, index): """Delete document from named index.""" cache.delete(self.search_document_cache_key) - get_client().delete( - index=index, - doc_type=self.search_doc_type, - id=self.pk - ) + get_client().delete(index=index, doc_type=self.search_doc_type, id=self.pk) class SearchQuery(models.Model): @@ -321,15 +327,16 @@ class SearchQuery(models.Model): user = models.ForeignKey( settings.AUTH_USER_MODEL, - related_name='search_queries', - blank=True, null=True, + related_name="search_queries", + blank=True, + null=True, help_text="The user who made the search query (nullable).", - on_delete=models.SET_NULL + on_delete=models.SET_NULL, ) index = models.CharField( max_length=100, - default='_all', - help_text="The name of the ElasticSearch index(es) being queried." + default="_all", + help_text="The name of the ElasticSearch index(es) being queried.", ) # The query property contains the raw DSL query, which can be arbitrarily complex - there # is no one way of mapping input text to the query itself. However, it's often helpful to @@ -337,27 +344,26 @@ class SearchQuery(models.Model): # JSON. search_terms = models.CharField( max_length=400, - default='', + default="", blank=True, - help_text="Free text search terms used in the query, stored for easy reference." + help_text="Free text search terms used in the query, stored for easy reference.", ) query = JSONField( - help_text="The raw ElasticSearch DSL query.", - encoder=DjangoJSONEncoder + help_text="The raw ElasticSearch DSL query.", encoder=DjangoJSONEncoder ) hits = JSONField( help_text="The list of meta info for each of the query matches returned.", - encoder=DjangoJSONEncoder + encoder=DjangoJSONEncoder, ) total_hits = models.IntegerField( default=0, - help_text="Total number of matches found for the query (!= the hits returned)." + help_text="Total number of matches found for the query (!= the hits returned).", ) reference = models.CharField( max_length=100, - default='', + default="", blank=True, - help_text="Custom reference used to identify and group related searches." + help_text="Custom reference used to identify and group related searches.", ) executed_at = models.DateTimeField( help_text="When the search was executed - set via execute() method." @@ -367,78 +373,68 @@ class SearchQuery(models.Model): ) class Meta: - app_label = 'elasticsearch_django' + app_label = "elasticsearch_django" verbose_name = "Search query" verbose_name_plural = "Search queries" def __str__(self): - return ( - "Query (id={}) run against index '{}'".format(self.pk, self.index) - ) + return "Query (id={}) run against index '{}'".format(self.pk, self.index) def __repr__(self): - return ( - "".format( - self.pk, - self.user, - self.index, - self.total_hits - ) + return "".format( + self.pk, self.user, self.index, self.total_hits ) @classmethod - def execute(cls, search, search_terms='', user=None, reference=None, save=True): + def execute(cls, search, search_terms="", user=None, reference=None, save=True): """Create a new SearchQuery instance and execute a search against ES.""" warnings.warn( "Pending deprecation - please use `execute_search` function instead.", - PendingDeprecationWarning + PendingDeprecationWarning, ) return execute_search( - search, - search_terms=search_terms, - user=user, - reference=reference, - save=save + search, search_terms=search_terms, user=user, reference=reference, save=save ) def save(self, **kwargs): """Save and return the object (for chaining).""" if self.search_terms is None: - self.search_terms = '' + self.search_terms = "" super().save(**kwargs) return self def _extract_set(self, _property): - return [] if self.hits is None else ( - list(set([h[_property] for h in self.hits])) + return ( + [] if self.hits is None else (list(set([h[_property] for h in self.hits]))) ) @property def doc_types(self): """List of doc_types extracted from hits.""" - return self._extract_set('doc_type') + return self._extract_set("doc_type") @property def max_score(self): """The max relevance score in the returned page.""" - return max(self._extract_set('score') or [0]) + return max(self._extract_set("score") or [0]) @property def min_score(self): """The min relevance score in the returned page.""" - return min(self._extract_set('score') or [0]) + return min(self._extract_set("score") or [0]) @property def object_ids(self): """List of model ids extracted from hits.""" - return self._extract_set('id') + return self._extract_set("id") @property def page_slice(self): """Return the query from:size tuple (0-based).""" - return None if self.query is None else ( - self.query.get('from', 0), - self.query.get('size', 10) + return ( + None + if self.query is None + else (self.query.get("from", 0), self.query.get("size", 10)) ) @property @@ -457,7 +453,7 @@ def page_size(self): return 0 if self.hits is None else len(self.hits) -def execute_search(search, search_terms='', user=None, reference=None, save=True): +def execute_search(search, search_terms="", user=None, reference=None, save=True): """ Create a new SearchQuery instance and execute a search against ES. @@ -482,13 +478,13 @@ def execute_search(search, search_terms='', user=None, reference=None, save=True log = SearchQuery( user=user, search_terms=search_terms, - index=', '.join(search._index or ['_all'])[:100], # field length restriction + index=", ".join(search._index or ["_all"])[:100], # field length restriction query=search.to_dict(), hits=[h.meta.to_dict() for h in response.hits], total_hits=response.hits.total, - reference=reference or '', + reference=reference or "", executed_at=tz_now(), - duration=duration + duration=duration, ) log.response = response return log.save() if save else log diff --git a/elasticsearch_django/settings.py b/elasticsearch_django/settings.py index ed2e21b..8b482d8 100644 --- a/elasticsearch_django/settings.py +++ b/elasticsearch_django/settings.py @@ -16,14 +16,14 @@ from elasticsearch import Elasticsearch -def get_client(connection='default'): +def get_client(connection="default"): """Return configured elasticsearch client.""" return Elasticsearch(get_connection_string(connection)) def get_settings(): """Return settings from Django conf.""" - return settings.SEARCH_SETTINGS['settings'] + return settings.SEARCH_SETTINGS["settings"] def get_setting(key, *default): @@ -39,19 +39,19 @@ def set_setting(key, value): get_settings()[key] = value -def get_connection_string(connection='default'): +def get_connection_string(connection="default"): """Return index settings from Django conf.""" - return settings.SEARCH_SETTINGS['connections'][connection] + return settings.SEARCH_SETTINGS["connections"][connection] def get_index_config(index): """Return index settings from Django conf.""" - return settings.SEARCH_SETTINGS['indexes'][index] + return settings.SEARCH_SETTINGS["indexes"][index] def get_index_names(): """Return list of the names of all configured indexes.""" - return list(settings.SEARCH_SETTINGS['indexes'].keys()) + return list(settings.SEARCH_SETTINGS["indexes"].keys()) def get_index_mapping(index): @@ -65,10 +65,10 @@ def get_index_mapping(index): """ # app_path = apps.get_app_config('elasticsearch_django').path - mappings_dir = get_setting('mappings_dir') - filename = '%s.json' % index + mappings_dir = get_setting("mappings_dir") + filename = "%s.json" % index path = os.path.join(mappings_dir, filename) - with open(path, 'r') as f: + with open(path, "r") as f: return json.load(f) @@ -80,8 +80,8 @@ def get_index_models(index): """ models = [] - for app_model in get_index_config(index).get('models'): - app, model = app_model.split('.') + for app_model in get_index_config(index).get("models"): + app, model = app_model.split(".") models.append(apps.get_model(app, model)) return models @@ -111,22 +111,22 @@ def get_document_models(): mappings = {} for i in get_index_names(): for m in get_index_models(i): - key = '%s.%s' % (i, m._meta.model_name) + key = "%s.%s" % (i, m._meta.model_name) mappings[key] = m return mappings def get_document_model(index, doc_type): """Return dict of index.doc_type: model.""" - return get_document_models().get('%s.%s' % (index, doc_type)) + return get_document_models().get("%s.%s" % (index, doc_type)) def auto_sync(instance): """Returns bool if auto_sync is on for the model (instance)""" # this allows us to turn off sync temporarily - e.g. when doing bulk updates - if not get_setting('auto_sync'): + if not get_setting("auto_sync"): return False model_name = "{}.{}".format(instance._meta.app_label, instance._meta.model_name) - if model_name in get_setting('never_auto_sync', []): + if model_name in get_setting("never_auto_sync", []): return False return True diff --git a/elasticsearch_django/tests/__init__.py b/elasticsearch_django/tests/__init__.py index a4763f6..5f134dd 100644 --- a/elasticsearch_django/tests/__init__.py +++ b/elasticsearch_django/tests/__init__.py @@ -24,5 +24,8 @@ class Meta: objects = TestModelManager() - def as_search_document(self, index='_all', update_fields=None): + def as_search_document(self, index): + return SEARCH_DOC + + def as_search_document_update(self, index, update_fields): return SEARCH_DOC diff --git a/elasticsearch_django/tests/test_apps.py b/elasticsearch_django/tests/test_apps.py index b0ef6bf..b8ff996 100644 --- a/elasticsearch_django/tests/test_apps.py +++ b/elasticsearch_django/tests/test_apps.py @@ -22,14 +22,14 @@ class SearchAppsConfigTests(TestCase): """Tests for the apps module ready function.""" - @mock.patch('elasticsearch_django.apps.settings.get_setting') - @mock.patch('elasticsearch_django.apps.settings.get_settings') - @mock.patch('elasticsearch_django.apps._validate_config') - @mock.patch('elasticsearch_django.apps._connect_signals') + @mock.patch("elasticsearch_django.apps.settings.get_setting") + @mock.patch("elasticsearch_django.apps.settings.get_settings") + @mock.patch("elasticsearch_django.apps._validate_config") + @mock.patch("elasticsearch_django.apps._connect_signals") def test_ready(self, mock_signals, mock_config, mock_settings, mock_setting): """Test the AppConfig.ready method.""" mock_setting.return_value = True # auto-sync - config = ElasticAppConfig('foo_bar', tests) + config = ElasticAppConfig("foo_bar", tests) config.ready() mock_config.assert_called_once_with(mock_setting.return_value) mock_signals.assert_called_once_with() @@ -49,99 +49,103 @@ class SearchAppsValidationTests(TestCase): def test__validate_model(self): """Test _validate_model function.""" # 1. model doesn't implement as_search_document - with mock.patch('elasticsearch_django.tests.test_apps.TestModel') as tm: + with mock.patch("elasticsearch_django.tests.test_apps.TestModel") as tm: del tm.as_search_document self.assertRaises(ImproperlyConfigured, _validate_model, tm) # 2. model.objects doesn't implement get_search_queryset - with mock.patch('elasticsearch_django.tests.test_apps.TestModel') as tm: + with mock.patch("elasticsearch_django.tests.test_apps.TestModel") as tm: del tm.objects.get_search_queryset self.assertRaises(ImproperlyConfigured, _validate_model, tm) # model should pass - with mock.patch('elasticsearch_django.tests.test_apps.TestModel') as tm: + with mock.patch("elasticsearch_django.tests.test_apps.TestModel") as tm: _validate_model(tm) - @mock.patch('elasticsearch_django.apps.settings') + @mock.patch("elasticsearch_django.apps.settings") def test__validate_mapping(self, mock_settings): """Test _validate_model function.""" - _validate_mapping('foo', strict=True) - mock_settings.get_index_mapping.assert_called_once_with('foo') + _validate_mapping("foo", strict=True) + mock_settings.get_index_mapping.assert_called_once_with("foo") mock_settings.get_index_mapping.side_effect = IOError() - self.assertRaises(ImproperlyConfigured, _validate_mapping, 'foo', strict=True) + self.assertRaises(ImproperlyConfigured, _validate_mapping, "foo", strict=True) # shouldn't raise error - _validate_mapping('foo', strict=False) + _validate_mapping("foo", strict=False) - @mock.patch('elasticsearch_django.apps.settings') - @mock.patch('elasticsearch_django.apps._validate_model') - @mock.patch('elasticsearch_django.apps._validate_mapping') + @mock.patch("elasticsearch_django.apps.settings") + @mock.patch("elasticsearch_django.apps._validate_model") + @mock.patch("elasticsearch_django.apps._validate_mapping") def test__validate_config(self, mock_mapping, mock_model, mock_settings): """Test _validate_model function.""" - mock_settings.get_index_names.return_value = ['foo'] + mock_settings.get_index_names.return_value = ["foo"] mock_settings.get_index_models.return_value = [TestModel] _validate_config() - mock_mapping.assert_called_once_with('foo', strict=False) + mock_mapping.assert_called_once_with("foo", strict=False) mock_model.assert_called_once_with(TestModel) - @mock.patch('elasticsearch_django.apps.signals') - @mock.patch('elasticsearch_django.apps.settings') + @mock.patch("elasticsearch_django.apps.signals") + @mock.patch("elasticsearch_django.apps.settings") def test__connect_signals(self, mock_settings, mock_signals): """Test the _connect_signals function.""" # this should connect up the signals once, for TestModel - mock_settings.get_index_names.return_value = ['foo'] + mock_settings.get_index_names.return_value = ["foo"] mock_settings.get_index_models.return_value = [TestModel] _connect_signals() mock_signals.post_save.connect.assert_called_once_with( - _on_model_save, - sender=TestModel, - dispatch_uid='testmodel.post_save' + _on_model_save, sender=TestModel, dispatch_uid="testmodel.post_save" ) mock_signals.post_delete.connect.assert_called_once_with( - _on_model_delete, - sender=TestModel, - dispatch_uid='testmodel.post_delete' + _on_model_delete, sender=TestModel, dispatch_uid="testmodel.post_delete" ) - @mock.patch.object(SearchDocumentMixin, 'search_indexes', ['foo']) - @mock.patch.object(SearchDocumentMixin, 'delete_search_document') + @mock.patch.object(SearchDocumentMixin, "search_indexes", ["foo"]) + @mock.patch.object(SearchDocumentMixin, "delete_search_document") def test__on_model_delete(self, mock_delete): """Test the _on_model_delete function.""" obj = SearchDocumentMixin() _on_model_delete(None, instance=obj) - mock_delete.assert_called_once_with(index='foo') + mock_delete.assert_called_once_with(index="foo") - @mock.patch.object(SearchDocumentMixin, 'search_indexes', ['foo', 'bar']) - @mock.patch.object(SearchDocumentMixin, 'delete_search_document') + @mock.patch.object(SearchDocumentMixin, "search_indexes", ["foo", "bar"]) + @mock.patch.object(SearchDocumentMixin, "delete_search_document") def test__on_model_delete__multiple_indexes(self, mock_delete): """Test the _on_model_delete function with multiple indexes.""" obj = SearchDocumentMixin() _on_model_delete(None, instance=obj) self.assertEqual(mock_delete.call_count, 2) - @mock.patch('elasticsearch_django.apps._update_search_index') + @mock.patch("elasticsearch_django.apps._update_search_index") def test__on_model_save__index(self, mock_update): """Test the _on_model_save function without update_fields.""" - obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=['foo']) + obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=["foo"]) _on_model_save(None, instance=obj, update_fields=None) - mock_update.assert_called_once_with(instance=obj, index='foo', update_fields=None) + mock_update.assert_called_once_with( + instance=obj, index="foo", update_fields=None + ) - @mock.patch('elasticsearch_django.apps._update_search_index') + @mock.patch("elasticsearch_django.apps._update_search_index") def test__on_model_save__update(self, mock_update): """Test the _on_model_save function without update_fields.""" - obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=['foo']) - _on_model_save(None, instance=obj, update_fields=['bar']) - mock_update.assert_called_once_with(instance=obj, index='foo', update_fields=['bar']) + obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=["foo"]) + _on_model_save(None, instance=obj, update_fields=["bar"]) + mock_update.assert_called_once_with( + instance=obj, index="foo", update_fields=["bar"] + ) def test__update_search_index(self): """Test the _update_search_index function with an index action.""" obj = mock.Mock(spec=SearchDocumentMixin) - _update_search_index(instance=obj, index='foo', update_fields=None) + _update_search_index(instance=obj, index="foo", update_fields=None) self.assertEqual(obj.index_search_document.call_count, 1) self.assertEqual(obj.update_search_document.call_count, 0) + obj.index_search_document.assert_called_once_with(index="foo") def test__update_search_update(self): """Test the _update_search_index function with an update action.""" obj = mock.Mock(spec=SearchDocumentMixin) - _update_search_index(instance=obj, index='foo', update_fields=['bar']) + _update_search_index(instance=obj, index="foo", update_fields=["bar"]) self.assertEqual(obj.update_search_document.call_count, 1) self.assertEqual(obj.index_search_document.call_count, 0) + obj.update_search_document.assert_called_once_with( + index="foo", update_fields=["bar"] + ) diff --git a/elasticsearch_django/tests/test_models.py b/elasticsearch_django/tests/test_models.py index 1f80be9..d8a26b0 100644 --- a/elasticsearch_django/tests/test_models.py +++ b/elasticsearch_django/tests/test_models.py @@ -35,7 +35,12 @@ def test_search_indexes(self, mock_indexes): def test_as_search_document(self): """Test the as_search_document method.""" obj = SearchDocumentMixin() - self.assertRaises(NotImplementedError, obj.as_search_document) + self.assertRaises(NotImplementedError, obj.as_search_document, index='_all') + + def test_as_search_document_update(self): + """Test the as_search_document_update method.""" + obj = SearchDocumentMixin() + self.assertRaises(NotImplementedError, obj.as_search_document_update, index='_all', update_fields=[]) @mock.patch('elasticsearch_django.settings.get_connection_string', lambda: 'http://testserver') @mock.patch('elasticsearch_django.models.get_client') diff --git a/elasticsearch_django/utils.py b/elasticsearch_django/utils.py index 024f17a..e57281e 100644 --- a/elasticsearch_django/utils.py +++ b/elasticsearch_django/utils.py @@ -17,7 +17,7 @@ def disable_search_updates(): """ _settings = get_settings() - _sync = _settings['auto_sync'] - _settings['auto_sync'] = False + _sync = _settings["auto_sync"] + _settings["auto_sync"] = False yield - _settings['auto_sync'] = _sync + _settings["auto_sync"] = _sync diff --git a/setup.py b/setup.py index b67cb7f..9178d8d 100644 --- a/setup.py +++ b/setup.py @@ -14,8 +14,8 @@ packages=find_packages(), install_requires=[ 'Django>=1.11', - 'elasticsearch>=5,<6', - 'elasticsearch-dsl>=5,<6', + 'elasticsearch>=6,<7', + 'elasticsearch-dsl>=6,<7', 'psycopg2-binary>=2.6', 'simplejson>=3.8' ],