From be0cbd6cd0a8d7bd4a55be8e85c54c258f27dd66 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 17:47:54 -0500 Subject: [PATCH 1/3] Be more efficient with `Media.loaded_metadata` --- tubesync/sync/models.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f3c051fa..1b3a1297 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -547,6 +547,9 @@ class Media(models.Model): Source. ''' + # Used to convert seconds to datetime + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + # Format to use to display a URL for the media URLS = _srctype_dict('https://www.youtube.com/watch?v={key}') @@ -771,6 +774,7 @@ class Meta: def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: + setattr(self, '_cached_metadata_dict', None) self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: @@ -1000,20 +1004,28 @@ def has_metadata(self): @property def reduce_data(self): try: - from common.logger import log - from common.utils import json_serial - - old_mdl = len(self.metadata or "") data = json.loads(self.metadata or "{}") + if '_reduce_data_ran_at' in data.keys(): + total_seconds = data['_reduce_data_ran_at'] + ran_at = posix_epoch + timedelta(seconds=total_seconds) + if (timezone.now() - ran_at) < timedelta(hours=1): + return data + + from common.utils import json_serial compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) filtered_data = filter_response(data, True) + filtered_data['_reduce_data_ran_at'] = round((timezone.now() - posix_epoch).total_seconds()) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) except Exception as e: + from common.logger import log log.exception('reduce_data: %s', e) else: + from common.logger import log + log.debug(f'reduce_data: running for: {self.source.name} / {self.key}') # log the results of filtering / compacting on metadata size new_mdl = len(compact_json) + old_mdl = len(self.metadata or "") if old_mdl > new_mdl: delta = old_mdl - new_mdl log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') @@ -1023,16 +1035,24 @@ def reduce_data(self): log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): self.metadata = filtered_json + return filtered_data @property def loaded_metadata(self): + data = None if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): - self.reduce_data + data = self.reduce_data try: - data = json.loads(self.metadata) + if not data: + cached = getattr(self, '_cached_metadata_dict', None) + if cached: + data = cached + else: + data = json.loads(self.metadata or "{}") if not isinstance(data, dict): return {} + setattr(self, '_cached_metadata_dict', data) return data except Exception as e: return {} @@ -1100,7 +1120,6 @@ def metadata_published(self, timestamp=None): if timestamp is not None: try: timestamp_float = float(timestamp) - posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) published_dt = posix_epoch + timedelta(seconds=timestamp_float) except Exception as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') From 3e2337d4a94a7d68ad5e66d8d7d6fbcba65230fb Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 17:58:15 -0500 Subject: [PATCH 2/3] fixup: only return filtered data when appropriate --- tubesync/sync/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index 1b3a1297..000f2830 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -1035,7 +1035,8 @@ def reduce_data(self): log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): self.metadata = filtered_json - return filtered_data + return filtered_data + return data @property From 7c2ba576f666bbc36cba08b48c7e4d0081832a04 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 23 Feb 2025 18:20:35 -0500 Subject: [PATCH 3/3] Return a copy instead of modifying the input --- tubesync/sync/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 0646a937..514f75b1 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1767,7 +1767,7 @@ def test_metadata_20230629(self): self.media.save() unfiltered = self.media.loaded_metadata - filtered = filter_response(self.media.loaded_metadata) + filtered = filter_response(self.media.loaded_metadata, True) self.assertIn('formats', unfiltered.keys()) self.assertIn('formats', filtered.keys()) # filtered 'downloader_options'