diff --git a/qurro/_metadata_utils.py b/qurro/_metadata_utils.py index 7dbe5c0b..15807569 100644 --- a/qurro/_metadata_utils.py +++ b/qurro/_metadata_utils.py @@ -137,6 +137,10 @@ def read_gnps_feature_metadata_file(md_file_loc, feature_ranks_df): with rows in the GNPS metadata file -- the precision of the numbers from which GNPS feature IDs are computed varies between the ranks/BIOM table and the actual numbers contained in the GNPS metadata file. + + NOTE: this function is experimental and mostly untested. Things like + invalid inputs (e.g. non-numeric "parent mass" or "RTConsensus" values) + will cause problems. """ # Note that we don't set index_col = 0 -- the columns we care about # ("parent mass", "RTConsensus", and "LibraryID"), as far as I know, don't diff --git a/qurro/tests/test_metadata_utils.py b/qurro/tests/test_metadata_utils.py index bd98c1e8..bc1b4021 100644 --- a/qurro/tests/test_metadata_utils.py +++ b/qurro/tests/test_metadata_utils.py @@ -5,7 +5,7 @@ from pandas.errors import ParserError import qiime2 from qurro._df_utils import replace_nan -from qurro._metadata_utils import read_metadata_file +from qurro._metadata_utils import read_metadata_file, get_truncated_feature_id def test_read_metadata_file_basic(): @@ -206,3 +206,27 @@ def test_read_metadata_file_nan_id(): with pytest.raises(qiime2.metadata.MetadataFileError): qiime2.Metadata.load(ni) + + +def test_get_truncated_feature_id(): + + ffi = "123.4567890123;456.7890123456789" + assert get_truncated_feature_id(ffi) == "123.4568;456.7890" + + assert get_truncated_feature_id("1.2;3.4") == "1.2000;3.4000" + + with pytest.raises(ValueError): + get_truncated_feature_id("") + + with pytest.raises(ValueError): + get_truncated_feature_id(" ") + + with pytest.raises(ValueError): + get_truncated_feature_id("abc") + + with pytest.raises(ValueError): + get_truncated_feature_id("abc;def") + + # Test case when there's too many semicolons + with pytest.raises(ValueError): + get_truncated_feature_id("1.0;2.0;3.0")