diff --git a/.github/workflows/nn-ci-cpu-testing.yml b/.github/workflows/nn-ci-cpu-testing.yml index 054ac981..49f92d91 100644 --- a/.github/workflows/nn-ci-cpu-testing.yml +++ b/.github/workflows/nn-ci-cpu-testing.yml @@ -25,8 +25,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ ubuntu-18.04 ] - python-version: [ 3.7, 3.8, 3.9 ] + os: [ ubuntu-latest ] + python-version: [ 3.9 3.10 3.11 ] # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 120 @@ -76,59 +76,26 @@ jobs: - name: Tests workflow run: | - - # test train examples ocr - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/by.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/eu.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/ge.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/kg.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/kz.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/ua.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/ua-1995.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/su.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/ocr/ru.ipynb - - # test train examples classification - #jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/classification/options-base-count_lines-train.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/classification/options-base-train.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/classification/options-custom-train.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/classification/options-train.ipynb - - # test train examples object detection - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/train/object_detection/yolov5-train.ipynb - # test python inference examples - python examples/py/inference/get-started-demo.py - python examples/py/inference/get-started-tiny-demo.py - python examples/py/inference/number-plate-filling-demo.py - python examples/py/inference/number-plate-recognition-multiline-demo.py + python tutorials/py/inference/get-started-demo.py + python tutorials/py/inference/get-started-tiny-demo.py + python tutorials/py/inference/number-plate-filling-demo.py # test jupyter inference examples - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/custom-options-model-demo.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/get-started-demo.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/get-started-tiny-demo.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/number-plate-bbox-filling.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/number-plate-keypoints-filling.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/number-plate-recognition-multiline-demo.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/inference/get-started-demo-ocr-custom.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/inference/custom-options-model-demo.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/inference/get-started-demo.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/inference/get-started-tiny-demo.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/inference/number-plate-filling.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/inference/get-started-demo-ocr-custom.ipynb # test python benchmarks examples - python examples/py/benchmark/accuracy-test.py - python examples/py/benchmark/runtime-test.py + python tutorials/py/benchmark/accuracy-test.py + python tutorials/py/benchmark/runtime-test.py # test jupyter benchmarks examples - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/benchmark/accuracy-test.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/benchmark/accuracy-test-custom.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/benchmark/accuracy-test-multiline.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/benchmark/runtime-test.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/benchmark/runtime-test-multiline.ipynb - - # test jupyter dataset tools examples - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/dataset_tools/analyze_via_dataset.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/dataset_tools/auto_number_grab.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/dataset_tools/check_ocr_model.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/dataset_tools/option_checker.ipynb - jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html examples/ju/dataset_tools/ocr_dataset_format_checker.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/benchmark/accuracy-test.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/benchmark/accuracy-test-custom.ipynb + jupyter nbconvert --ExecutePreprocessor.timeout=6000 --execute --to html tutorials/ju/benchmark/runtime-test.ipynb # test image loaders python3 nomeroff_net/image_loaders/base.py @@ -140,9 +107,6 @@ jobs: # test nnmodels python3 -m nomeroff_net.nnmodels.numberplate_classification_model -f nomeroff_net/nnmodels/numberplate_classification_model.py python3 -m nomeroff_net.nnmodels.numberplate_options_model -f nomeroff_net/nnmodels/numberplate_options_model.py - python3 -m nomeroff_net.nnmodels.fraud_numberpate_model -f nomeroff_net/nnmodels/fraud_numberpate_options.py - python3 -m nomeroff_net.nnmodels.numberplate_inverse_model -f nomeroff_net/nnmodels/numberplate_inverse_model.py - python3 -m nomeroff_net.nnmodels.numberplate_orientation_model -f nomeroff_net/nnmodels/numberplate_orientation_model.py python3 -m nomeroff_net.nnmodels.ocr_model -f nomeroff_net/nnmodels/ocr_model.py # test tools diff --git a/README.md b/README.md index 659f03d1..7fa36b4c 100644 --- a/README.md +++ b/README.md @@ -172,11 +172,16 @@ This gives you the opportunity to get **99% accuracy**on photos that are uploade Nomeroff-Net OCR Example + ## Contributing Contributions to this repository are welcome. Examples of things you can contribute: * Training on other datasets. * Accuracy Improvements. +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=ria-com/nomeroff-net&type=Date)](https://star-history.com/#ria-com/nomeroff-net&Date) + ## Credits * Dmytro Probachay <dmytro.probachay@ria.com> * Oleg Cherniy <oleg.cherniy@ria.com> diff --git a/data/dataset/Dataset/make_dataset_from_image_name.py b/data/dataset/Dataset/make_dataset_from_image_name.py index e20a3748..40311aa1 100644 --- a/data/dataset/Dataset/make_dataset_from_image_name.py +++ b/data/dataset/Dataset/make_dataset_from_image_name.py @@ -378,7 +378,7 @@ def __init__(self, easyocr_readers=None, exclude_zones_list=None): self.reader = easyocr.Reader(easyocr_readers) self.exclude_zones_list = exclude_zones_list - def predict(self, img, count_lines, flag_show=False): + def predict(self, img, count_lines, regions, flag_show=False): # Display the aligned and cropped image result = self.reader.readtext(img) result = normalize_easyocr_output(result) @@ -404,27 +404,92 @@ class NomeroffNetReader: def __init__(self, presets=None): if presets is None: presets = { + "eu_ua_2004_2015_efficientnet_b2": { + "for_regions": ["eu_ua_2004"], + "for_count_lines": [1], + "model_path": "latest" + }, + "eu_ua_1995_efficientnet_b2": { + "for_regions": ["eu_ua_1995"], + "for_count_lines": [1], + "model_path": "latest" + }, + "eu_ua_custom_efficientnet_b2": { + "for_regions": ["eu_ua_custom"], + "for_count_lines": [1], + "model_path": "latest" + }, + "xx_transit_efficientnet_b2": { + "for_regions": ["xx_transit"], + "for_count_lines": [1], + "model_path": "latest" + }, + "eu_efficientnet_b2": { + "for_regions": ["eu", "xx_unknown", "eu_ua_2015"], + "for_count_lines": [1], + "model_path": "latest" + }, + "ru": { + "for_regions": ["ru", "eu_ua_ordlo_lpr", "eu_ua_ordlo_dpr"], + "for_count_lines": [1], + "model_path": "latest" + }, + "kz": { + "for_regions": ["kz"], + "for_count_lines": [1], + "model_path": "latest" + }, + "kg": { # "kg_shufflenet_v2_x2_0" + "for_regions": ["kg"], + "for_count_lines": [1], + "model_path": "latest" + }, + "ge": { + "for_regions": ["ge"], + "for_count_lines": [1], + "model_path": "latest" + }, + "su_efficientnet_b2": { + "for_regions": ["su"], + "for_count_lines": [1], + "model_path": "latest" + }, + "am": { + "for_regions": ["am"], + "for_count_lines": [1], + "model_path": "latest" + }, + "by": { + "for_regions": ["by"], + "for_count_lines": [1], + "model_path": "latest" + }, "eu_2lines_efficientnet_b2": { "for_regions": ["eu_ua_2015", "eu_ua_2004", "eu_ua_1995", "eu_ua_custom", "xx_transit", "eu", "xx_unknown", "ru", "eu_ua_ordlo_lpr", "eu_ua_ordlo_dpr", "kz", - "kg", "ge", "su", "am", "by"], + "kg", "ge", "am", "by"], "for_count_lines": [2, 3], "model_path": "latest" }, + "su_2lines_efficientnet_b2": { + "for_regions": ["su", "military"], + "for_count_lines": [2, 3], + "model_path": "latest" + } } self.number_plate_text_reading = NumberPlateTextReading( "number_plate_text_reading", image_loader=None, presets=presets, default_label="eu", - default_lines_count=2, + default_lines_count=1, multiline_splitter=" ", ) - def predict(self, img, count_lines, flag_show=False): + def predict(self, img, count_lines, regions, flag_show=False): number_plate_text_reading_res = unzip( self.number_plate_text_reading(unzip([[img], - ["eu"], + regions, count_lines, [img]]))) if len(number_plate_text_reading_res): texts, _ = number_plate_text_reading_res @@ -436,7 +501,7 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_md_dataset", parse_fromat="md", flag_show=False, reader=None, need_upscale_image=False, - count_hyphens=1 + count_hyphens=1, min_count_line=0, ): if need_upscale_image: up = HAT(tile_size=320, num_gpu=int(device_torch == "cuda")) @@ -520,6 +585,7 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", # Apply the perspective transformation to the image aligned_img = cv2.warpPerspective(image_part_upscale, M, (w, h)) region_ids, count_lines, confidences, predicted = classifiactor.predict_with_confidence([aligned_img]) + region_names = classifiactor.get_region_labels(region_ids) max_count_lines = max(max_count_lines, count_lines[0]) # Тут далі можна шось робити @@ -541,13 +607,13 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", # Apply the perspective transformation to the image aligned_img = cv2.warpPerspective(image_part_upscale, M, (w, h)) - predicted_lines = reader.predict(aligned_img, count_lines) + predicted_lines = reader.predict(aligned_img, count_lines, region_names) parsed_numberplate, numberplate_lines, punctuation_np_lines = fromats_parse[parse_fromat]( numberplate, count_line=count_lines[0]) print(count_lines, "numberplate", parsed_numberplate, numberplate_lines, punctuation_np_lines) - if count_lines[0] > 1: + if count_lines[0] > min_count_line: # Make dataset numberplate_dataset_item = NumberplateDatasetItem(numberplate_lines, punctuation_np_lines, photo_id, parsed_numberplate, @@ -559,7 +625,7 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", numberplate_dataset_item.write_orig_dataset() numberplate_dataset_item.write_normalize_dataset() else: - warnings.warn("count_lines <= 1") + warnings.warn(f"count_lines <= {min_count_line}") if flag_show: plt.imshow(aligned_img) @@ -578,7 +644,7 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", x1, y1, x2, y2 = bbox cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 4) - if max_count_lines < 2: + if max_count_lines < min_count_line+1: bad_src_dir = os.path.join(target_dataset, "bad_cnt_lines") os.makedirs(bad_src_dir, exist_ok=True) cv2.imwrite(os.path.join(bad_src_dir, os.path.basename(img_path)), img) @@ -589,166 +655,9 @@ def create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", if __name__ == "__main__": - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/fi/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_fi_dataset", - # parse_fromat="fi", count_hyphens=2, - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["FIN"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/md/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_md_dataset", - # parse_fromat="md", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["MD"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/pl/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_pl_dataset", - # parse_fromat="default", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["PL"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/by/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_by_dataset", - # parse_fromat="default", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=[])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/kz/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_kz_dataset", - # parse_fromat="kz", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["KZ"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/ro/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_ro_dataset", - # parse_fromat="ro", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["RO"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/lv/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_lv_dataset", - # parse_fromat="default", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["LV"])) - # - # create_dataset(img_dir="/mnt/datasets/nomeroff-net/2lines_np_parsed/lt/*/*", - # target_dataset="/mnt/datasets/nomeroff-net/2lines_np_parsed/mlines_lt_dataset", - # parse_fromat="default", - # reader=EasyOCRReader(easyocr_readers=["en"], exclude_zones_list=["LT"])) - - # create_dataset(img_dir="/var/www/projects_computer_vision/nomeroff-net/data/dataset/Dataset/src_test_platesmania/*", - # target_dataset="/var/www/projects_computer_vision/nomeroff-net/data/dataset/Dataset" - # "/src_test_platesmania_dataset", - # parse_fromat="default", - # reader=EasyOCRReader(easyocr_readers=["ru", "uk"], exclude_zones_list=[])) - - - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/al/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/al", - parse_fromat="al", - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/at/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/at", - parse_fromat="at", - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/ba/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/ba", - parse_fromat="ba", - count_hyphens=3, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/be/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/be", - parse_fromat="be", - count_hyphens=3, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/bg/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/bg", - parse_fromat="bg", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/cy/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/cy", - parse_fromat="cy", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/var/www/projects_computer_vision/nomeroff-net/data/dataset/Dataset/test/de-problem-short/*/*", - target_dataset="/var/www/projects_computer_vision/nomeroff-net/data/dataset/Dataset/test_dataset/de-problem-short", - parse_fromat="de", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dk/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/dk", - parse_fromat="dk", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - # hard - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/es/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/es", - parse_fromat="es", - count_hyphens=2, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/gg/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/gg", - parse_fromat="gg", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/gr/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/gr", - parse_fromat="gr", - count_hyphens=2, - reader=NomeroffNetReader(), - ) - # hard - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/is/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/is", - parse_fromat="is", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - # Ліхтенштейн знаходить як однолінійні - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/li/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/li", - parse_fromat="li", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/lu/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/lu", - parse_fromat="lu", - count_hyphens=1, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/mt/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/mt", - parse_fromat="mt", + create_dataset(img_dir="/mnt/datasets/nomeroff-net/example/*", + target_dataset="/mnt/datasets/nomeroff-net/dataset", + parse_fromat="default", count_hyphens=1, - reader=NomeroffNetReader(), - ) - # hard - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/nl/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/nl", - parse_fromat="nl", - count_hyphens=3, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/no/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/no", - parse_fromat="no", - count_hyphens=2, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/pl/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/pl", - parse_fromat="pl", - count_hyphens=2, - reader=NomeroffNetReader(), - ) - create_dataset(img_dir="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/uk/*/*", - target_dataset="/mnt/raid2/datasets/nomeroff-net/2lines_np_parsed/new/dataset/uk", - parse_fromat="uk", - count_hyphens=2, - reader=NomeroffNetReader(), - ) + min_count_line=0, + reader=NomeroffNetReader()) diff --git a/data/dataset/TextDetector/ocr_example/test/ann/10001_2914KC7_0.json b/data/dataset/TextDetector/ocr_example/test/ann/10001_2914KC7_0.json index df645aa2..67442d76 100644 --- a/data/dataset/TextDetector/ocr_example/test/ann/10001_2914KC7_0.json +++ b/data/dataset/TextDetector/ocr_example/test/ann/10001_2914KC7_0.json @@ -1 +1 @@ -{"tags":[],"objects":[],"state_id":"2","region_id":"11","size":{"width":146,"height":31},"moderation":{"isModerated":1,"moderatedBy":"Andrei Dziaineka"},"description":"2914 KC-7","name":"10001_2914KC7_0","count_lines":"1"} \ No newline at end of file +{"tags":[],"objects":[],"state_id":"2","region_id":"11","size":{"width":146,"height":31},"moderation":{"isModerated":1,"moderatedBy":"Andrei Dziaineka"},"description":"2914KC7","name":"10001_2914KC7_0","count_lines":"1"} \ No newline at end of file diff --git a/nomeroff_net/nnmodels/ocr_model.py b/nomeroff_net/nnmodels/ocr_model.py index 247f3342..5638ef06 100644 --- a/nomeroff_net/nnmodels/ocr_model.py +++ b/nomeroff_net/nnmodels/ocr_model.py @@ -156,10 +156,10 @@ def calculate_loss(self, logits, texts): logits_lens = torch.full(size=(batch_size,), fill_value=input_len, dtype=torch.int32) # calculate ctc loss = self.criterion( - logits, - encoded_texts, + logits.to(device), + encoded_texts.to(device), logits_lens.to(device), - text_lens) + text_lens.to(device)) return loss def step(self, batch): diff --git a/nomeroff_net/pipelines/number_plate_text_reading.py b/nomeroff_net/pipelines/number_plate_text_reading.py index 815cdd92..d26c25bb 100644 --- a/nomeroff_net/pipelines/number_plate_text_reading.py +++ b/nomeroff_net/pipelines/number_plate_text_reading.py @@ -7,7 +7,7 @@ DEFAULT_PRESETS = { "eu_ua_2004_2015_efficientnet_b2": { - "for_regions": ["eu_ua_2015", "eu_ua_2004"], + "for_regions": ["eu_ua_2004"], "for_count_lines": [1], "model_path": "latest" }, @@ -27,7 +27,7 @@ "model_path": "latest" }, "eu_efficientnet_b2": { - "for_regions": ["eu", "xx_unknown"], + "for_regions": ["eu", "xx_unknown", "eu_ua_2015"], "for_count_lines": [1], "model_path": "latest" }, diff --git a/nomeroff_net/pipes/number_plate_text_readers/base/ocr.py b/nomeroff_net/pipes/number_plate_text_readers/base/ocr.py index 7f205218..db25efcb 100644 --- a/nomeroff_net/pipes/number_plate_text_readers/base/ocr.py +++ b/nomeroff_net/pipes/number_plate_text_readers/base/ocr.py @@ -382,10 +382,10 @@ def get_acc(self, predicted: List, decode: List) -> torch.Tensor: logits_lens = torch.full(size=(batch_size,), fill_value=input_len, dtype=torch.int32) acc = functional.ctc_loss( - logits, - encoded_texts, + logits.to(device), + encoded_texts.to(device), logits_lens.to(device), - text_lens + text_lens.to(device) ) return 1 - acc / len(self.letters) diff --git a/nomeroff_net/tools/datasets_tools.py b/nomeroff_net/tools/datasets_tools.py index e5d71c14..807c8546 100644 --- a/nomeroff_net/tools/datasets_tools.py +++ b/nomeroff_net/tools/datasets_tools.py @@ -152,9 +152,13 @@ def auto_number_grab(root_dir, res_dir, replace_template=None, csv_dataset_path= number_plate_detection_and_reading = pipeline("number_plate_detection_and_reading", image_loader=image_loader, **kwargs) - photos = pd.DataFrame(columns=['photoId']) + photos = pd.DataFrame(columns=['photoId', 'npText']) if csv_dataset_path is not None: - photos = pd.read_csv(csv_dataset_path) + photos = pd.read_csv( + csv_dataset_path, + usecols=['photoId', 'npText'], + dtype={'photoId': int, 'npText': str} + ) photos = photos.set_index(['photoId']) if os.path.exists(res_dir): diff --git a/public/index.html b/public/index.html index 0d6474bb..a4785ef7 100644 --- a/public/index.html +++ b/public/index.html @@ -44,12 +44,12 @@