Skip to content

Commit

Permalink
peds-w-jeff
Browse files Browse the repository at this point in the history
  • Loading branch information
cherman2 committed Aug 19, 2024
1 parent 0335d7b commit 949df13
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 26 deletions.
38 changes: 23 additions & 15 deletions q2_fmt/_peds.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

def peds(ctx, table, metadata, peds_metric, time_column, reference_column,
subject_column, filter_missing_references=False,
drop_incomplete_subjects=False, drop_incomplete_timepoint=None,
drop_incomplete_subjects=False, drop_incomplete_timepoints=None,
level_delimiter=None):

peds_heatmap = ctx.get_action('fmt', 'peds_heatmap')
Expand All @@ -38,14 +38,14 @@ def peds(ctx, table, metadata, peds_metric, time_column, reference_column,
table=table, metadata=metadata, time_column=time_column,
subject_column=subject_column, reference_column=reference_column,
drop_incomplete_subjects=drop_incomplete_subjects,
drop_incomplete_timepoint=drop_incomplete_timepoint,
drop_incomplete_timepoints=drop_incomplete_timepoints,
filter_missing_references=filter_missing_references)

else:
if drop_incomplete_subjects or drop_incomplete_timepoint:
if drop_incomplete_subjects or drop_incomplete_timepoints:
warnings.warn('Feature PEDS was selected as the PEDS metric, which'
' does not accept `drop_incomplete_subjects` or'
' `drop_incomplete_timepoint` as parameters. One'
' `drop_incomplete_timepoints` as parameters. One'
' (or both) of these parameters were detected in'
' your input, and will be ignored.')

Expand Down Expand Up @@ -105,6 +105,11 @@ def peds_heatmap(output_dir: str, data: pd.DataFrame,
measure_name=measure_name, order=order,
n_label=n_label, data_denom=data_denom)

if global_stats is None:
gstats = None
if per_subject_stats is None:
table1 = None
psstats = None
with open(os.path.join(output_dir, 'index.html'), 'w') as fh:
spec_string = json.dumps(full_spec)
fh.write(index.render(spec=spec_string,
Expand All @@ -117,12 +122,17 @@ def sample_peds(table: pd.DataFrame, metadata: qiime2.Metadata,
time_column: str, reference_column: str, subject_column: str,
filter_missing_references: bool = False,
drop_incomplete_subjects: bool = False,
drop_incomplete_timepoint: list = None) -> (pd.DataFrame):
drop_incomplete_timepoints: list = None) -> (pd.DataFrame):

ids_with_data = table.index
metadata = metadata.filter_ids(ids_to_keep=ids_with_data)
column_properties = metadata.columns
# TODO: Make incomplete samples possible move this to heatmap
metadata = metadata.to_dataframe()
if drop_incomplete_timepoints is not None:
metadata = _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoints)
table.filter(items=metadata.index)
# TODO: Make incomplete samples possible move this to heatmap
num_timepoints = _check_for_time_column(metadata, time_column)
_check_column_type(column_properties, "time",
time_column, "numeric")
Expand All @@ -139,10 +149,7 @@ def sample_peds(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column, "categorical")
_check_duplicate_subject_timepoint(subject_series, metadata,
subject_column, time_column)
if drop_incomplete_timepoint is not None:
metadata = _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoint)
table.filter(items=metadata.index)

# return things that should be removed
metadata, used_references = \
_check_subjects_in_all_timepoints(subject_series, num_timepoints,
Expand Down Expand Up @@ -357,7 +364,7 @@ def _check_reference_column(metadata, reference_column):
def _filter_associated_reference(reference_series, metadata, time_column,
filter_missing_references, reference_column):
used_references = reference_series[~metadata[time_column].isna()]

print(used_references)
if used_references.isna().any():
if filter_missing_references:
metadata = metadata.dropna(subset=[reference_column])
Expand Down Expand Up @@ -393,8 +400,8 @@ def _check_duplicate_subject_timepoint(subject_series, metadata,


def _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoint):
for time in drop_incomplete_timepoint:
drop_incomplete_timepoints):
for time in drop_incomplete_timepoints:
try:
assert (float(time)
in metadata[time_column].unique())
Expand Down Expand Up @@ -492,7 +499,7 @@ def peds_simulation(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column: str,
filter_missing_references: bool = False,
drop_incomplete_subjects: bool = False,
drop_incomplete_timepoint: list = None,
drop_incomplete_timepoints: list = None,
num_iterations: int = 999) -> (pd.DataFrame, pd.DataFrame):

metadata_df = metadata.to_dataframe()
Expand Down Expand Up @@ -527,7 +534,8 @@ def peds_simulation(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column=subject_column,
filter_missing_references=filter_missing_references,
drop_incomplete_subjects=drop_incomplete_subjects,
drop_incomplete_timepoint=drop_incomplete_timepoint).set_index("id")
drop_incomplete_timepoints=drop_incomplete_timepoints
).set_index("id")
actual_peds = peds["measure"]

# Mismatch simulation:
Expand Down
22 changes: 11 additions & 11 deletions q2_fmt/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@
drop_incomplete_subjects = ('Filter out subjects that do not have a sample at'
' every timepoint. Default behavior is to raise an'
' error if any subject is missing a timepoint.')
drop_incomplete_timepoint = ('Filter out specified timepoints. This is useful'
' for removing frequently missing timepoints'
' which cause many subjects to be dropped.'
' Default behavior is to raise an error if any'
' subject is missing a timepoint.')
drop_incomplete_timepoints = ('Filter out multiple specified timepoints.'
' This is useful for removing frequently missing'
' timepoints which cause many subjects to be'
' dropped. Default behavior is to raise an error'
' if any subject is missing a timepoint.')
level_delimiter = 'delimiter to split taxonomic label on'
control_column = ('The column within `metadata` that contains any relevant'
' control group IDs. Actual treatment samples should not'
Expand Down Expand Up @@ -233,7 +233,7 @@
'subject_column': Str,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str],
'drop_incomplete_timepoints': List[Str],
'level_delimiter': Str},
outputs=[('peds_heatmap', Visualization)],
input_descriptions={'table': peds_table},
Expand All @@ -245,7 +245,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint,
'drop_incomplete_timepoints': drop_incomplete_timepoints,
'level_delimiter': level_delimiter},
output_descriptions={'peds_heatmap': 'PEDS heatmap visualization'},
name='PEDS pipeline to calculate feature or sample PEDS',
Expand Down Expand Up @@ -276,7 +276,7 @@
'reference_column': Str, 'subject_column': Str,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str]},
'drop_incomplete_timepoints': List[Str]},
outputs=[('peds_dists', Dist1D[Ordered, Matched] % Properties("peds"))],
input_descriptions={'table': peds_table},
parameter_descriptions={
Expand All @@ -286,7 +286,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint
'drop_incomplete_timepoints': drop_incomplete_timepoints
},
output_descriptions={
'peds_dists': peds_dists
Expand Down Expand Up @@ -337,7 +337,7 @@
'subject_column': T_subject,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str],
'drop_incomplete_timepoints': List[Str],
'num_iterations': Int % Range(99, None)},
outputs=[('per_subject_stats', StatsTable[Pairwise]),
('global_stats', StatsTable[Pairwise])],
Expand All @@ -348,7 +348,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint,
'drop_incomplete_timepoints': drop_incomplete_timepoints,
'num_iterations': 'The number of iterations to run the Monte Carlo'
' simulation on'
},
Expand Down

0 comments on commit 949df13

Please sign in to comment.