diff --git a/tubesync/sync/models.py b/tubesync/sync/models.py index f4ce1b88..df57665d 100644 --- a/tubesync/sync/models.py +++ b/tubesync/sync/models.py @@ -547,6 +547,9 @@ class Media(models.Model): Source. ''' + # Used to convert seconds to datetime + posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) + # Format to use to display a URL for the media URLS = _srctype_dict('https://www.youtube.com/watch?v={key}') @@ -771,6 +774,7 @@ class Meta: def save(self, force_insert=False, force_update=False, using=None, update_fields=None): # Trigger an update of derived fields from metadata if self.metadata: + setattr(self, '_cached_metadata_dict', None) self.title = self.metadata_title[:200] self.duration = self.metadata_duration if update_fields is not None and "metadata" in update_fields: @@ -1011,20 +1015,28 @@ def has_metadata(self): @property def reduce_data(self): try: - from common.logger import log - from common.utils import json_serial - - old_mdl = len(self.metadata or "") data = json.loads(self.metadata or "{}") + if '_reduce_data_ran_at' in data.keys(): + total_seconds = data['_reduce_data_ran_at'] + ran_at = posix_epoch + timedelta(seconds=total_seconds) + if (timezone.now() - ran_at) < timedelta(hours=1): + return data + + from common.utils import json_serial compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) filtered_data = filter_response(data, True) + filtered_data['_reduce_data_ran_at'] = round((timezone.now() - posix_epoch).total_seconds()) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) except Exception as e: + from common.logger import log log.exception('reduce_data: %s', e) else: + from common.logger import log + log.debug(f'reduce_data: running for: {self.source.name} / {self.key}') # log the results of filtering / compacting on metadata size new_mdl = len(compact_json) + old_mdl = len(self.metadata or "") if old_mdl > new_mdl: delta = old_mdl - new_mdl log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') @@ -1034,16 +1046,25 @@ def reduce_data(self): log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): self.metadata = filtered_json + return filtered_data + return data @property def loaded_metadata(self): + data = None if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): - self.reduce_data + data = self.reduce_data try: - data = json.loads(self.metadata) + if not data: + cached = getattr(self, '_cached_metadata_dict', None) + if cached: + data = cached + else: + data = json.loads(self.metadata or "{}") if not isinstance(data, dict): return {} + setattr(self, '_cached_metadata_dict', data) return data except Exception as e: return {} @@ -1111,7 +1132,6 @@ def metadata_published(self, timestamp=None): if timestamp is not None: try: timestamp_float = float(timestamp) - posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc) published_dt = posix_epoch + timedelta(seconds=timestamp_float) except Exception as e: log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') diff --git a/tubesync/sync/tests.py b/tubesync/sync/tests.py index 0646a937..514f75b1 100644 --- a/tubesync/sync/tests.py +++ b/tubesync/sync/tests.py @@ -1767,7 +1767,7 @@ def test_metadata_20230629(self): self.media.save() unfiltered = self.media.loaded_metadata - filtered = filter_response(self.media.loaded_metadata) + filtered = filter_response(self.media.loaded_metadata, True) self.assertIn('formats', unfiltered.keys()) self.assertIn('formats', filtered.keys()) # filtered 'downloader_options'