diff --git a/django/api/migrations/0026_alter_uploadedvinsfile_chunk_size.py b/django/api/migrations/0026_alter_uploadedvinsfile_chunk_size.py new file mode 100644 index 00000000..292cb0e6 --- /dev/null +++ b/django/api/migrations/0026_alter_uploadedvinsfile_chunk_size.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2024-05-28 00:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0025_auto_20240516_2248'), + ] + + operations = [ + migrations.AlterField( + model_name='uploadedvinsfile', + name='chunk_size', + field=models.IntegerField(default=5000), + ), + ] diff --git a/django/api/models/uploaded_vins_file.py b/django/api/models/uploaded_vins_file.py index 5f3345e1..a958dd28 100644 --- a/django/api/models/uploaded_vins_file.py +++ b/django/api/models/uploaded_vins_file.py @@ -5,7 +5,7 @@ class UploadedVinsFile(Auditable): filename = models.CharField(max_length=32, unique=True) - chunk_size = models.IntegerField(default=25000) + chunk_size = models.IntegerField(default=5000) chunks_per_run = models.IntegerField(default=4) diff --git a/django/workers/tasks.py b/django/workers/tasks.py index 1efe4076..280a6c8a 100644 --- a/django/workers/tasks.py +++ b/django/workers/tasks.py @@ -26,6 +26,11 @@ def read_uploaded_vins_file(): # then we'll have to compare the (vin, postal_code) keys to existing records in the database, and # determine which ones need to get bulk-inserted, and which ones bulk-updated. # also have to keep in mind the memory used by any data structures we use + def close_file_response(file_response): + if file_response is not None: + file_response.close() + file_response.release_conn() + @transaction.atomic def inner(vins_file, file_response): if vins_file is not None and file_response is not None: @@ -33,19 +38,22 @@ def inner(vins_file, file_response): file_response = None vins_file = ( - UploadedVinsFile.objects.filter(processed=False).order_by("create_timestamp").first() + UploadedVinsFile.objects.filter(processed=False) + .order_by("create_timestamp") + .first() ) if vins_file is not None: file_response = get_minio_object(vins_file.filename) try: func_timeout(600, inner, args=(vins_file, file_response)) + close_file_response(file_response) except FunctionTimedOut: print("reading vins file job timed out") + close_file_response(file_response) + raise Exception + except Exception: + close_file_response(file_response) raise Exception - finally: - if file_response is not None: - file_response.close() - file_response.release_conn() def batch_decode_vins(service_name, batch_size=50): @@ -59,7 +67,10 @@ def inner(): service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value + "__lt": max_decode_attempts, } - order_by = [service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value, "create_timestamp"] + order_by = [ + service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value, + "create_timestamp", + ] uploaded_vin_records = UploadedVinRecord.objects.filter(**filters).order_by( *order_by )[:batch_size]