Skip to content

Commit

Permalink
Merge pull request #162 from ypriverol/patch-01
Browse files Browse the repository at this point in the history
Major changes and bug fixing
  • Loading branch information
ypriverol authored Feb 28, 2024
2 parents 01e73d8 + f5a449c commit b78c2b5
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 40 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ name: Continuous integration Unit tests

on:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]

jobs:
PythonBlack:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ name: Python application

on:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]

jobs:
build:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ name: Python package

on:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion sdrf_pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.24"
__version__ = "0.0.25"
51 changes: 39 additions & 12 deletions sdrf_pipelines/sdrf/sdrf_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ def ontology_term_parser(cell_value: str = None):

class SDRFColumn(Column):
def __init__(
self,
name: str,
validations: typing.Iterable["_BaseValidation"] = None,
optional_validations: typing.Iterable["_BaseValidation"] = None,
allow_empty=False,
optional_type=True,
self,
name: str,
validations: typing.Iterable["_BaseValidation"] = None,
optional_validations: typing.Iterable["_BaseValidation"] = None,
allow_empty=False,
optional_type=True,
):
if validations is None:
validations = []
Expand Down Expand Up @@ -145,8 +145,8 @@ def validate(self, series: pd.Series) -> pd.Series:

if ontology_terms is not None:
query_labels = [o["label"].lower() for o in ontology_terms]
for label in query_labels:
labels.append(label)
if term[TERM_NAME] in query_labels:
labels.append(term[TERM_NAME])
if self._not_available:
labels.append(NOT_AVAILABLE)
if self._not_applicable:
Expand Down Expand Up @@ -179,6 +179,10 @@ def validate(self, panda_sdrf: sdrf = None) -> typing.List[LogicError]:
)
errors.append(LogicError(error_message, error_type=logging.WARN))

empty_cells_errors = self.validate_empty_cells(panda_sdrf)
if empty_cells_errors:
errors.extend(empty_cells_errors)

# Check the mandatory fields
error_mandatory = self.validate_mandatory_columns(panda_sdrf)
if error_mandatory is not None:
Expand Down Expand Up @@ -218,9 +222,9 @@ def validate_column_names(self, panda_sdrf):
errors.append(cname)
elif m.group().startswith("factor value"):
if (
m.group().replace("factor value", "comment") not in panda_sdrf.columns
and m.group().replace("factor value", "characteristics") not in panda_sdrf.columns
and m.group() not in panda_sdrf.columns
m.group().replace("factor value", "comment") not in panda_sdrf.columns
and m.group().replace("factor value", "characteristics") not in panda_sdrf.columns
and m.group() not in panda_sdrf.columns
):
error_message = "The " + cname + " column should also be in the characteristics or comment"
logerror.append(LogicError(error_message, error_type=logging.ERROR))
Expand Down Expand Up @@ -260,7 +264,7 @@ def validate_columns_order(panda_sdrf):
error_message = "The column " + column + "cannot be before the assay name"
error_columns_order.append(LogicError(error_message, error_type=logging.ERROR))
if (
"characteristics" in column or ("material type" in column and "factor value" not in column)
"characteristics" in column or ("material type" in column and "factor value" not in column)
) and cnames.index(column) > index:
error_message = "The column " + column + "cannot be after the assay name"
error_columns_order.append(LogicError(error_message, error_type=logging.ERROR))
Expand Down Expand Up @@ -310,6 +314,29 @@ def check_recommendations(self, panda_sdrf):
warnings += column.validate_optional(series)
return sorted(warnings, key=lambda e: e.row)

def validate_empty_cells(self, panda_sdrf):
"""
Check for empty cells in the SDRF. This method will return a list of errors if any empty cell is found.
:param panda_sdrf: SDRF dataframe
:return: List of errors
"""
errors = []

def validate_string(cell_value):
return cell_value is not None and cell_value != "nan" and len(cell_value.strip()) > 0

# Apply the validation function element-wise
validation_results = panda_sdrf.map(validate_string)

# Get the indices where the validation fails
failed_indices = [(row, col) for row in validation_results.index for col in validation_results.columns if
not validation_results.at[row, col]]

for row, col in failed_indices:
message = f"Empty value found Row: {row}, Column: {col}"
errors.append(LogicError(message, error_type=logging.ERROR))
return errors


default_schema = SDRFSchema(
[
Expand Down
77 changes: 56 additions & 21 deletions sdrf_pipelines/zooma/ols.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,17 @@ def get_ancestors(self, ont, iri):
raise ex

def search(
self,
name,
query_fields=None,
ontology=None,
field_list=None,
children_of=None,
exact=None,
bytype="class",
self,
name: str,
query_fields=None,
ontology: str=None,
field_list=None,
children_of=None,
exact: bool=None,
bytype: str="class",
rows: int=10,
num_retries:int=10,
start: int=0,
):
"""
Searches the OLS with the given term
Expand All @@ -124,6 +127,8 @@ def search(
@:param exact: Forces exact match if not `None`
@:param bytype: restrict to terms one of {class,property,individual,ontology}
@:param childrenOf: Search only under a certain term.
@:param rows: number of rows to query on each call of OLS search
@:param num_retries: Number of retries to OLS when it fails.
"""
params = {"q": name}
if ontology is not None:
Expand All @@ -135,6 +140,9 @@ def search(
if bytype:
params["type"] = _concat_str_or_list(bytype)

if rows:
params["rows"] = rows

if ontology:
params["ontology"] = _concat_str_or_list(ontology)
elif self.ontology:
Expand All @@ -155,26 +163,53 @@ def search(
if len(children_of) > 0:
params["childrenOf"] = _concat_str_or_list(children_of)

retry_num = 0
if start:
params["start"] = start

docs_found = []

while retry_num < 10:
for retry_num in range(num_retries):
try:
req = self.session.get(self.ontology_search, params=params)
logger.debug("Request to OLS search API: %s - %s", req.status_code, name)
logger.debug("Request to OLS search API term %s, status code %s", name, req.status_code)

req.raise_for_status()
if req.json()["response"]["numFound"]:
return req.json()["response"]["docs"]
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
if req.status_code != 200:
logger.error("OLS search term %s error tried number %s", name, retry_num)
req.raise_for_status()
else:
logger.debug("OLS search returned empty response for %s", name)
return None
if req.json()["response"]["numFound"] == 0:
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
else:
logger.debug("OLS search returned empty response for %s", name)
return docs_found
elif len(req.json()["response"]["docs"]) < rows:
return req.json()["response"]["docs"]
else:
docs_found = req.json()["response"]["docs"]
docs_found.extend(self.search(name, query_fields=query_fields, ontology=ontology,
field_list=field_list, children_of=children_of, exact=exact,
bytype=bytype, rows=rows, num_retries=num_retries,
start=(rows + (start))))
return docs_found

if req.status_code == 200 and req.json()["response"]["numFound"] == 0:
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
else:
logger.debug("OLS search returned empty response for %s", name)
return None
elif req.status_code != 200 and req.json()["response"]["numFound"] > 0:
if len(req.json()["response"]["docs"]) <= rows:
return req.json()["response"]["docs"]
else:
start = 0
docs_found = req.json()["response"]["docs"]

except Exception as ex:
retry_num += 1
logger.debug("OLS error searching the following term -- %s iteration %s.\n%e", req.url, retry_num, ex)
logger.exception("OLS error searching the following term -- %s iteration %s.\n%e", req.url, retry_num, ex)

return None
return docs_found

def suggest(self, name, ontology=None):
"""Suggest terms from an optional list of ontologies
Expand Down

0 comments on commit b78c2b5

Please sign in to comment.