diff --git a/.github/workflows/python-pytest.yml b/.github/workflows/python-pytest.yml new file mode 100644 index 0000000..b82a09c --- /dev/null +++ b/.github/workflows/python-pytest.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Execute tests with pytest + +on: + push: + branches: [ "dev" ] + pull_request: + branches: [ "dev", "main" ] +permissions: + contents: read +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f tests/requirements.txt ]; then pip install -r tests/requirements.txt; fi + # if [ -f pyproject.toml ]; then pip install -r pyproject.toml; fi + #- name: Lint with flake8 + # run: | + # # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/docs/dev_notes.md b/docs/dev_notes.md index 7c57bd1..5bf7a43 100644 --- a/docs/dev_notes.md +++ b/docs/dev_notes.md @@ -5,7 +5,7 @@ We set this up so it is hosted as a huggingface space. Each commit to `main` tri For local testing, assuming you have all the required packages installed in a conda env or virtualenv, and that env is activated: -``` +```bash cd src streamlit run main.py ``` @@ -17,15 +17,17 @@ We have a CI action to presesnt the docs on github.io. To validate locally, you need the deps listed in `requirements.txt` installed. Run -``` +```bash mkdocs serve ``` + And navigate to the wish server running locally, by default: http://127.0.0.1:8888/ This automatically watches for changes in the markdown files, but if you edit the something else like the docstrings in py files, triggering a rebuild in another terminal refreshes the site, without having to quit and restart the server. -``` + +```bash mkdocs build -c ``` @@ -37,4 +39,44 @@ mkdocs build -c # Set up a conda env -(Standard stuff) \ No newline at end of file +(Standard stuff) + + +# Testing + +## local testing +To run the tests locally, we have the standard dependencies of the project, plus the test runner dependencies. + +```bash +pip install -r tests/requirements.txt +``` + +(If we migrate to using toml config, the test reqs could be consolidated into an optional section) + + +**Running tests** +from the project root, simply run: + +```bash +pytest +# or pick a specific test file to run +pytest tests/test_whale_viewer.py +``` + +To generate a coverage report to screen (also run the tests): +```bash +pytest --cov=src +``` + +To generate reports on pass rate and coverage, to files: +```bash +pytest --junit-xml=test-results.xml +pytest --cov-report=lcov --cov=src +``` + + +## CI testing + +Initially we have an action setup that runs all tests in the `tests` directory, within the `test/tests` branch. + +TODO: Add some test report & coverage badges to the README. diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..7767b6d --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +pythonpath = "src" +testpaths = + tests + diff --git a/snippets/test_upload.py b/snippets/try_upload.py similarity index 100% rename from snippets/test_upload.py rename to snippets/try_upload.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/apptest/demo_whale_viewer.py b/src/apptest/demo_whale_viewer.py new file mode 100644 index 0000000..e951116 --- /dev/null +++ b/src/apptest/demo_whale_viewer.py @@ -0,0 +1,30 @@ +# a minimal snippet for the whale viewer, for testing purposes +# - using AppTest to validate that the display_whale functionality +# is ok +# - currently placed in the src directory (not optimal) because +# I couldn't get pytest to pick it up from the tests directory. +# - TODO: find a cleaner solution for organisation (maybe just config to pytest?) + +import streamlit as st + +# to run streamlit from this subdir, we need the the src dir on the path +# NOTE: pytest doesn't need this to run the tests, but to develop the test +# harness is hard without running streamlit +import sys +from os import path +# src (parent from here) +src_dir = path.dirname( path.dirname( path.abspath(__file__) ) ) +sys.path.append(src_dir) + + +import whale_viewer as sw_wv + +# a menu to pick one of the images +title = st.title("Whale Viewer testing") +species = st.selectbox("Species", sw_wv.WHALE_CLASSES) + +if species is not None: + # and display the image + reference + st.write(f"Selected species: {species}") + sw_wv.display_whale([species], 0, st) + diff --git a/src/input_handling.py b/src/input_handling.py index 2b604ed..b2631d7 100644 --- a/src/input_handling.py +++ b/src/input_handling.py @@ -1,3 +1,4 @@ +from fractions import Fraction from PIL import Image from PIL import ExifTags import re @@ -194,14 +195,68 @@ def get_image_datetime(image_file: UploadedFile) -> str | None: image = Image.open(image_file) exif_data = image._getexif() if exif_data is not None: - for tag, value in exif_data.items(): - if ExifTags.TAGS.get(tag) == 'DateTimeOriginal': - return value + if ExifTags.Base.DateTimeOriginal in exif_data: + return exif_data.get(ExifTags.Base.DateTimeOriginal) except Exception as e: # FIXME: what types of exception? st.warning(f"Could not extract date from image metadata. (file: {image_file.name})") # TODO: add to logger return None +def decimal_coords(coords:tuple, ref:str) -> Fraction: + """ + Converts coordinates from degrees, minutes, and seconds to decimal degrees. + + Args: + coords (tuple): A tuple containing three elements representing degrees, minutes, and seconds. + ref (str): A string representing the reference direction ('N', 'S', 'E', 'W'). + + Returns: + Fraction: The coordinates in decimal degrees. Negative if the reference is 'S' or 'W'. + + Example: + decimal_coords((40, 26, 46), 'N') -> 40.44611111111111 + decimal_coords((40, 26, 46), 'W') -> -40.44611111111111 + """ + # https://stackoverflow.com/a/73267185 + if ref not in ['N', 'S', 'E', 'W']: + raise ValueError("Invalid reference direction. Must be 'N', 'S', 'E', or 'W'.") + if len(coords) != 3: + raise ValueError("Coordinates must be a tuple of three elements (degrees, minutes, seconds).") + + decimal_degrees = coords[0] + coords[1] / 60 + coords[2] / 3600 + if ref == "S" or ref =='W': + decimal_degrees = -decimal_degrees + return decimal_degrees + + +def get_image_latlon(image_file: UploadedFile) -> tuple[float, float] | None: + """ + Extracts the latitude and longitude from the EXIF metadata of an uploaded image file. + + Args: + image_file (UploadedFile): The uploaded image file from which to extract the latitude and longitude. + + Returns: + tuple[float, float]: The latitude and longitude as a tuple if available, otherwise None. + + Raises: + Warning: If the latitude and longitude could not be extracted from the image metadata. + """ + try: + image = Image.open(image_file) + exif_data = image._getexif() + if exif_data is not None: + if ExifTags.Base.GPSInfo in exif_data: + gps_ifd = exif_data.get(ExifTags.Base.GPSInfo) + + lat = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLatitude], gps_ifd[ExifTags.GPS.GPSLatitudeRef])) + lon = float(decimal_coords(gps_ifd[ExifTags.GPS.GPSLongitude], gps_ifd[ExifTags.GPS.GPSLongitudeRef])) + + return lat, lon + + except Exception as e: # FIXME: what types of exception? + st.warning(f"Could not extract latitude and longitude from image metadata. (file: {str(image_file)}") + # an arbitrary set of defaults so testing is less painful... # ideally we add in some randomization to the defaults diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..345b03b --- /dev/null +++ b/tests/README.md @@ -0,0 +1,10 @@ +# Test report + +- overall status: ![CI test build](https://github.com/sdsc-ordes/saving-willy/actions/workflows/python-pytest.yml/badge.svg) +- more detailed test report: TODO + +## Test coverage + +- TODO + - For a summary: one way is using https://github.com/GaelGirodon/ci-badges-action, can add it as a post-pytest step to the CI + - For a table: try this https://github.com/coroo/pytest-coverage-commentator \ No newline at end of file diff --git a/tests/data/cakes.jpg b/tests/data/cakes.jpg new file mode 100644 index 0000000..ed5c25b Binary files /dev/null and b/tests/data/cakes.jpg differ diff --git a/tests/data/cakes_no_exif_datetime.jpg b/tests/data/cakes_no_exif_datetime.jpg new file mode 100644 index 0000000..b881793 Binary files /dev/null and b/tests/data/cakes_no_exif_datetime.jpg differ diff --git a/tests/data/cakes_no_exif_gps.jpg b/tests/data/cakes_no_exif_gps.jpg new file mode 100644 index 0000000..f639dc9 Binary files /dev/null and b/tests/data/cakes_no_exif_gps.jpg differ diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..f085d1d --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,6 @@ +# tests +pytest~=8.3.4 +pytest-cov~=6.0.0 +# linting +#flake8 + diff --git a/tests/test_demo_whale_viewer.py b/tests/test_demo_whale_viewer.py new file mode 100644 index 0000000..62d43c1 --- /dev/null +++ b/tests/test_demo_whale_viewer.py @@ -0,0 +1,129 @@ +from streamlit.testing.v1 import AppTest +import pytest # for the exception testing + +import whale_viewer as sw_wv # for data + + +def test_selectbox_ok(): + ''' + test the snippet demoing whale viewer - relating to AppTest'able elements + + we validate that + - there is one selectbox present, with initial value "beluga" and index 0 + - the two markdown elems generated dynamically by the selection corresponds + + - then changing the selection, we do the same checks again + + - finally, we check there are the right number of options (26) + + ''' + at = AppTest.from_file("src/apptest/demo_whale_viewer.py").run() + assert len(at.selectbox) == 1 + assert at.selectbox[0].value == "beluga" + assert at.selectbox[0].index == 0 + + # let's check that the markdown is right + # the first markdown should be "Selected species: beluga" + assert at.markdown[0].value == "Selected species: beluga" + # the second markdown should be "### :whale: #1: Beluga" + print("markdown 1: ", at.markdown[1].value) + assert at.markdown[1].value == "### :whale: #1: Beluga" + + # now let's select a different element. index 4 is commersons_dolphin + v4 = "commersons_dolphin" + v4_str = v4.replace("_", " ").title() + + at.selectbox[0].set_value(v4).run() + assert at.selectbox[0].value == v4 + assert at.selectbox[0].index == 4 + # the first markdown should be "Selected species: commersons_dolphin" + assert at.markdown[0].value == f"Selected species: {v4}" + # the second markdown should be "### :whale: #1: Commersons Dolphin" + assert at.markdown[1].value == f"### :whale: #1: {v4_str}" + + # test there are the right number of options + print("PROPS=> ", dir(at.selectbox[0])) # no length unfortunately, + # test it dynamically intead. + # should be fine + at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)-1).run() + # should fail + with pytest.raises(Exception): + at.selectbox[0].select_index(len(sw_wv.WHALE_CLASSES)).run() + +def test_img_props(): + ''' + test the snippet demoing whale viewer - relating to the image + + we validate that + - one image is displayed + - the caption corresponds to the data in WHALE_REFERENCES + - the url is a mock url + + - then changing the image, we do the same checks again + + ''' + at = AppTest.from_file("src/apptest/demo_whale_viewer.py").run() + ix = 0 # we didn't interact with the dropdown, so it should be the first one + # could fetch the property - maybe better in case code example changes + ix = at.selectbox[0].index + + elem = at.get("imgs") # hmm, apparently the naming is not consistent with the other AppTest f/w. + # type(elem[0]) -> "streamlit.testing.v1.element_tree.UnknownElement" haha + assert len(elem) == 1 + img0 = elem[0] + + # we can't check the image, but maybe the alt text? + #assert at.image[0].alt == "beluga" # no, doesn't have that property. + + # for v1.39, the proto comes back something like this: + exp_proto = ''' + imgs { + caption: "https://www.fisheries.noaa.gov/species/beluga-whale" + url: "/mock/media/6a21db178fcd99b82817906fc716a5c35117f4daa1d1c1d3c16ae1c8.png" + } + width: -3 + ''' + # from the proto string we can look for : "" pairs and make a dictionary + import re + + def parse_proto(proto_str): + pattern = r'(\w+):\s*"([^"]+)"' + matches = re.findall(pattern, proto_str) + return {key: value for key, value in matches} + + parsed_proto = parse_proto(str(img0.proto)) + # we're expecting the caption to be WHALE_REFERENCES[ix] + print(parsed_proto) + assert "caption" in parsed_proto + assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix] + assert "url" in parsed_proto + assert parsed_proto["url"].startswith("/mock/media") + + print(sw_wv.WHALE_REFERENCES[ix]) + + # now let's switch to another index + ix = 15 + v15 = sw_wv.WHALE_CLASSES[ix] + v15_str = v15.replace("_", " ").title() + at.selectbox[0].set_value(v15).run() + + elem = at.get("imgs") + img0 = elem[0] + print("[INFO] image 0 after adjusting dropdown:") + print(img0.type, type(img0.proto))#, "\t", i0.value) # it doesn't have a value + print(img0.proto) + + + parsed_proto = parse_proto(str(img0.proto)) + # we're expecting the caption to be WHALE_REFERENCES[ix] + print(parsed_proto) + assert "caption" in parsed_proto + assert parsed_proto["caption"] == sw_wv.WHALE_REFERENCES[ix] + assert "url" in parsed_proto + assert parsed_proto["url"].startswith("/mock/media") + + + + + + diff --git a/tests/test_input_handling.py b/tests/test_input_handling.py new file mode 100644 index 0000000..b61a318 --- /dev/null +++ b/tests/test_input_handling.py @@ -0,0 +1,208 @@ +import pytest +from pathlib import Path + +from input_handling import is_valid_email, is_valid_number +from input_handling import get_image_datetime, get_image_latlon, decimal_coords + +# generate tests for is_valid_email +# - test with valid email +# - basic email with @ and . +# - test with email with multiple . +# - test with empty email +# - test with None email +# - test with non-string email +# - test with invalid email +# - test with email without @ +# - test with email without . +# - test with email without domain +# - test with email without username +# - test with email without TLD +# - test with email with multiple @ +# - test with email starting with the + sign + + +def test_is_valid_email_valid(): + assert is_valid_email("j@a.bc") + assert is_valid_email("j+oneuse@a.bc") + assert is_valid_email("a@b.cd") + assert is_valid_email("a.b@c.de") + assert is_valid_email("a.b@c.de.fg") + +def test_is_valid_email_empty(): + assert not is_valid_email("") + +def test_is_valid_email_none(): + with pytest.raises(TypeError): + is_valid_email(None) + +def test_is_valid_email_non_string(): + with pytest.raises(TypeError): + is_valid_email(123) + + +def test_is_valid_email_invalid(): + assert not is_valid_email("a.bc") + assert not is_valid_email("a@bc") + assert not is_valid_email("a.b@cc") + assert not is_valid_email("@b.cc") + assert not is_valid_email("a@.cc") + assert not is_valid_email("a@b.") + assert not is_valid_email("a@bb.") + assert not is_valid_email("a@b.cc.") + assert not is_valid_email("a@b@c.d") + +# not sure how xfails come through the CI pipeline yet. +# maybe better to just comment out this stuff until pipeline is setup, then can check /extend +@pytest.mark.xfail(reason="Bug identified, but while setting up CI having failing tests causes more headache") +def test_is_valid_email_invalid_plus(): + assert not is_valid_email("+@test.com") + assert not is_valid_email("+oneuse@test.com") + + +def test_is_valid_number_valid(): + # with a sign or without, fractional or integer are all valid + assert is_valid_number("123") + assert is_valid_number("123.456") + assert is_valid_number("-123") + assert is_valid_number("-123.456") + assert is_valid_number("+123") + assert is_valid_number("+123.456") + +def test_is_valid_number_empty(): + assert not is_valid_number("") + +def test_is_valid_number_none(): + with pytest.raises(TypeError): + is_valid_number(None) + +def test_is_valid_number_invalid(): + # func should return False for strings that are not numbers + assert not is_valid_number("abc") + assert not is_valid_number("123abc") + assert not is_valid_number("abc123") + assert not is_valid_number("123.456.789") + assert not is_valid_number("123,456") + assert not is_valid_number("123-456") + assert not is_valid_number("123+456") +def test_is_valid_number_valid(): + assert is_valid_number("123") + assert is_valid_number("123.456") + assert is_valid_number("-123") + assert is_valid_number("-123.456") + assert is_valid_number("+123") + assert is_valid_number("+123.456") + +def test_is_valid_number_empty(): + assert not is_valid_number("") + +def test_is_valid_number_none(): + with pytest.raises(TypeError): + is_valid_number(None) + +def test_is_valid_number_invalid(): + assert not is_valid_number("abc") + assert not is_valid_number("123abc") + assert not is_valid_number("abc123") + assert not is_valid_number("123.456.789") + assert not is_valid_number("123,456") + assert not is_valid_number("123-456") + assert not is_valid_number("123+456") + + + +# tests for get_image_datetime +# - testing with a valid image with complete, valid metadata +# - testing with a valid image with incomplete metadata (missing datetime info -- that's a legitimate case we should handle) +# - testing with a valid image with incomplete metadata (missing GPS info -- should not affect the datetime extraction) +# - testing with a valid image with no metadata +# - timezones too + + +test_data_pth = Path('tests/data/') +def test_get_image_datetime(): + + # this image has lat, lon, and datetime + f1 = test_data_pth / 'cakes.jpg' + assert get_image_datetime(f1) == "2024:10:24 15:59:45" + #"+02:00" + # hmm, the full datetime requires timezone, which is called OffsetTimeOriginal + + # missing GPS loc: this should not interfere with the datetime + f2 = test_data_pth / 'cakes_no_exif_gps.jpg' + assert get_image_datetime(f2) == "2024:10:24 15:59:45" + + # missng datetime -> expect None + f3 = test_data_pth / 'cakes_no_exif_datetime.jpg' + assert get_image_datetime(f3) == None + + +def test_get_image_latlon(): + # this image has lat, lon, and datetime + f1 = test_data_pth / 'cakes.jpg' + assert get_image_latlon(f1) == (46.51860277777778, 6.562075) + + # missing GPS loc + f2 = test_data_pth / 'cakes_no_exif_gps.jpg' + assert get_image_latlon(f2) == None + + # missng datetime -> expect gps not affected + f3 = test_data_pth / 'cakes_no_exif_datetime.jpg' + assert get_image_latlon(f3) == (46.51860277777778, 6.562075) + +# tests for get_image_latlon with empty file +def test_get_image_latlon_empty(): + assert get_image_latlon("") == None + +# tests for decimal_coords +# - without input, py raises TypeError +# - with the wrong length of input (expecting 3 elements in the tuple), expect ValueError +# - with string inputs instead of numeric, we get a TypeError (should the func bother checking this? happens as built in) +# - with ref direction not in ['N', 'S', 'E', 'W'], expect ValueError, try X, x, NW. +# - with valid inputs, expect the correct output + + +# test data for decimal_coords: (deg,min,sec), ref, expected output +coords_conversion_data = [ + ((30, 1, 2), 'W', -30.01722222), + ((30, 1, 2), 'E', 30.01722222), + ((30, 1, 2), 'N', 30.01722222), + ((30, 1, 2), 'S', -30.01722222), + ((46, 31, 6.97), 'N', 46.51860278), + ((6, 33, 43.47), 'E', 6.56207500) +] +@pytest.mark.parametrize("input_coords, ref, expected_output", coords_conversion_data) +def test_decimal_coords(input_coords, ref, expected_output): + assert decimal_coords(input_coords, ref) == pytest.approx(expected_output) + +def test_decimal_coords_no_input(): + with pytest.raises(TypeError): + decimal_coords() + +def test_decimal_coords_wrong_length(): + with pytest.raises(ValueError): + decimal_coords((1, 2), 'W') + + with pytest.raises(ValueError): + decimal_coords((30,), 'W') + + with pytest.raises(ValueError): + decimal_coords((30, 1, 2, 4), 'W') + +def test_decimal_coords_non_numeric(): + with pytest.raises(TypeError): + decimal_coords(('1', '2', '3'), 'W') + + +def test_decimal_coords_invalid_ref(): + with pytest.raises(ValueError): + decimal_coords((30, 1, 2), 'X') + + with pytest.raises(ValueError): + decimal_coords((30, 1, 2), 'x') + + with pytest.raises(ValueError): + decimal_coords((30, 1, 2), 'NW') + + + + diff --git a/tests/test_whale_viewer.py b/tests/test_whale_viewer.py new file mode 100644 index 0000000..d8df56b --- /dev/null +++ b/tests/test_whale_viewer.py @@ -0,0 +1,50 @@ +import pytest +from pathlib import Path + +from whale_viewer import format_whale_name + +# testing format_whale_name +# - testing with valid whale names +# - testing with invalid whale names +# - empty string +# - with the wrong datatype + +def test_format_whale_name_ok(): + # some with 1 word, most with 2 words, others with 3 or 4. + assert format_whale_name("right_whale") == "Right Whale" + assert format_whale_name("blue_whale") == "Blue Whale" + assert format_whale_name("humpback_whale") == "Humpback Whale" + assert format_whale_name("sperm_whale") == "Sperm Whale" + assert format_whale_name("fin_whale") == "Fin Whale" + assert format_whale_name("sei_whale") == "Sei Whale" + assert format_whale_name("minke_whale") == "Minke Whale" + assert format_whale_name("gray_whale") == "Gray Whale" + assert format_whale_name("bowhead_whale") == "Bowhead Whale" + assert format_whale_name("beluga") == "Beluga" + + assert format_whale_name("long_finned_pilot_whale") == "Long Finned Pilot Whale" + assert format_whale_name("melon_headed_whale") == "Melon Headed Whale" + assert format_whale_name("pantropic_spotted_dolphin") == "Pantropic Spotted Dolphin" + assert format_whale_name("spotted_dolphin") == "Spotted Dolphin" + assert format_whale_name("killer_whale") == "Killer Whale" + + +def test_format_whale_name_invalid(): + # not so clear what this would be, except perhaps a string that has gone through the fucn alrealdy? + assert format_whale_name("Right Whale") == "Right Whale" + assert format_whale_name("Blue Whale") == "Blue Whale" + assert format_whale_name("Long Finned Pilot Whale") == "Long Finned Pilot Whale" + +# testing with empty string +def test_format_whale_name_empty(): + assert format_whale_name("") == "" + +# testing with the wrong datatype +# we should get a TypeError - currently it fails with a AttributeError +@pytest.mark.xfail +def test_format_whale_name_none(): + with pytest.raises(TypeError): + format_whale_name(None) + + +# display_whale requires UI to test it. \ No newline at end of file