Skip to content

Commit

Permalink
update(data init): no 'raw' nor 'integration_files' sub-sub-folders r…
Browse files Browse the repository at this point in the history
…equired
  • Loading branch information
johaGL committed Jan 26, 2024
1 parent 0397288 commit a5e6d63
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ requirements: test_environment

## Make Dataset
data: requirements
$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
$(PYTHON_INTERPRETER) src/data/make_dataset.py data/

## Delete all compiled Python files
clean:
Expand Down
1 change: 0 additions & 1 deletion src/dimet/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ hydra:
# This config.yaml file is used by the code when calling --help

defaults:
# - analysis: abundance_plot # deactivated as it blocks the --help display
- _self_
- override hydra/help: dimet_help

Expand Down
24 changes: 10 additions & 14 deletions src/dimet/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def build(self) -> "Dataset":

class Dataset(BaseModel):
config: DatasetConfig
raw_data_folder: str = None
sub_folder_absolute: str = None
metadata_df: Optional[pd.DataFrame] = None
abundances_df: Optional[pd.DataFrame] = None
Expand Down Expand Up @@ -75,22 +74,23 @@ def preload(self):
logger.info("looking for data in %s", self.sub_folder_absolute)
else:
self.sub_folder_absolute = self.config.subfolder
self.raw_data_folder = os.path.join(self.sub_folder_absolute, "raw")


# start loading the dataframes
file_paths = [
("metadata", os.path.join(self.raw_data_folder,
("metadata", os.path.join(self.sub_folder_absolute,
self.config.metadata + ".csv")),
("abundances", os.path.join(self.raw_data_folder,
("abundances", os.path.join(self.sub_folder_absolute,
self.config.abundances + ".csv")),
("mean_enrichment", os.path.join(
self.raw_data_folder,
self.sub_folder_absolute,
self.config.mean_enrichment + ".csv")),
("isotopologue_proportions", os.path.join(
self.raw_data_folder,
self.sub_folder_absolute,
self.config.isotopologue_proportions + ".csv")),
("isotopologues", os.path.join(
self.raw_data_folder, self.config.isotopologues + ".csv")),
self.sub_folder_absolute,
self.config.isotopologues + ".csv")),
]
dfs = []
for label, file_path in file_paths:
Expand Down Expand Up @@ -130,7 +130,7 @@ def preload(self):
# log the first 5 rows of the metadata
logger.info("Loaded metadata: \n%s", self.metadata_df.head())
logger.info(
"Finished loading raw dataset %s, available dataframes are : %s",
"Finished loading dataset %s, available dataframes are : %s",
self.config.label, self.available_datasets
)
self.check_expectations()
Expand Down Expand Up @@ -191,17 +191,13 @@ class DataIntegrationConfig(DatasetConfig):

class DataIntegration(Dataset):
config: DataIntegrationConfig
integration_files_folder_absolute: str = None # absolute path directory
deg_dfs: Dict[int, pd.DataFrame] = {}
pathways_dfs: Dict[str, pd.DataFrame] = {}

def set_dataset_integration_config(self):
self.preload()
self.split_datafiles_by_compartment()

self.integration_files_folder_absolute = os.path.join(
self.sub_folder_absolute, "integration_files")

self.check_expectations() # of the Dataset class
self.check_expectations_integration_data() # of this child class

Expand Down Expand Up @@ -231,7 +227,7 @@ def load_deg_dfs(self):
for i, file_name in enumerate(self.config.transcripts):
try:
path_deg_file = os.path.join(
self.integration_files_folder_absolute,
self.sub_folder_absolute,
f"{file_name}.csv")
deg_df = pd.read_csv(path_deg_file, sep='\t', header=0)
self.deg_dfs[i] = deg_df
Expand All @@ -247,7 +243,7 @@ def load_pathways_dfs(self):
for k in self.config.pathways.keys():
try:
path_file = os.path.join(
self.integration_files_folder_absolute,
self.sub_folder_absolute,
f"{self.config.pathways[k]}.csv")
pathway_df = pd.read_csv(path_file, sep='\t', header=0)
self.pathways_dfs[k] = pathway_df
Expand Down

0 comments on commit a5e6d63

Please sign in to comment.