Skip to content

Commit

Permalink
fix: 285 - update default chunksize for processing vin files (#313)
Browse files Browse the repository at this point in the history
  • Loading branch information
tim738745 authored May 28, 2024
1 parent abe1730 commit 7dbf76d
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 7 deletions.
18 changes: 18 additions & 0 deletions django/api/migrations/0026_alter_uploadedvinsfile_chunk_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.25 on 2024-05-28 00:49

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0025_auto_20240516_2248'),
]

operations = [
migrations.AlterField(
model_name='uploadedvinsfile',
name='chunk_size',
field=models.IntegerField(default=5000),
),
]
2 changes: 1 addition & 1 deletion django/api/models/uploaded_vins_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class UploadedVinsFile(Auditable):
filename = models.CharField(max_length=32, unique=True)

chunk_size = models.IntegerField(default=25000)
chunk_size = models.IntegerField(default=5000)

chunks_per_run = models.IntegerField(default=4)

Expand Down
23 changes: 17 additions & 6 deletions django/workers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,34 @@ def read_uploaded_vins_file():
# then we'll have to compare the (vin, postal_code) keys to existing records in the database, and
# determine which ones need to get bulk-inserted, and which ones bulk-updated.
# also have to keep in mind the memory used by any data structures we use
def close_file_response(file_response):
if file_response is not None:
file_response.close()
file_response.release_conn()

@transaction.atomic
def inner(vins_file, file_response):
if vins_file is not None and file_response is not None:
parse_and_save(vins_file, file_response)

file_response = None
vins_file = (
UploadedVinsFile.objects.filter(processed=False).order_by("create_timestamp").first()
UploadedVinsFile.objects.filter(processed=False)
.order_by("create_timestamp")
.first()
)
if vins_file is not None:
file_response = get_minio_object(vins_file.filename)
try:
func_timeout(600, inner, args=(vins_file, file_response))
close_file_response(file_response)
except FunctionTimedOut:
print("reading vins file job timed out")
close_file_response(file_response)
raise Exception
except Exception:
close_file_response(file_response)
raise Exception
finally:
if file_response is not None:
file_response.close()
file_response.release_conn()


def batch_decode_vins(service_name, batch_size=50):
Expand All @@ -59,7 +67,10 @@ def inner():
service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value
+ "__lt": max_decode_attempts,
}
order_by = [service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value, "create_timestamp"]
order_by = [
service.NUMBER_OF_CURRENT_DECODE_ATTEMPTS.value,
"create_timestamp",
]
uploaded_vin_records = UploadedVinRecord.objects.filter(**filters).order_by(
*order_by
)[:batch_size]
Expand Down

0 comments on commit 7dbf76d

Please sign in to comment.