gwas.py: fixed gwas demo run and IID column position (#234)

* Added --firth option. Plink2 output files, *.glm.firth and *.glm.logistic.hybrid, are now renamed to *.logistic. * shorten long lines * fix typo * Added fix for #232 * update version.py and changelog * Update CHANGELOG.md set version as 1.8.1 * Update version.py set version as 1.8.1 --------- Co-authored-by: Espen Hagen <[email protected]>
comorment · Mar 5, 2024 · d4ef7de · d4ef7de
1 parent 1d9a4ec
commit d4ef7de
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,24 +21,31 @@ If MD5 sum is not listed for a certain release then it means that the container
 
 ### Added
 
-* Additions goes here
+* Additions go here
 
 ### Updated
 
-* Updates goes here
+* Updates go here
 
 ### Fixed
 
-* Fixes goes here
+* Fixes go here
 
 ### Removed
 
-* Removals goes here
+* Removals go here
 
 ### Misc
 
 * Miscellaneous goes here
 
+## [1.8.1] - 2024-03-05
+
+### Fixed
+
+* Fixed parsing of `IID` field in `pheno.dict`
+* Fixed issue with files with different suffixes produced by plink2 for binary phenotypes in `gwas.py`
+
 ## [1.8.0] - 2024-02-22
 
 ### Added

diff --git a/scripts/gwas/gwas.py b/scripts/gwas/gwas.py
@@ -114,6 +114,9 @@ def parse_args(args):
                               help="covariates to control for (must be columns of the --pheno-file); individuals with "
                               "missing values for any covariates will be excluded not just from <out>.covar, but also "
                               "from <out>.pheno file")
+    pheno_parser.add_argument("--firth", type=str, default="firth-fallback",
+                              help="regression mode for binary phenotypes. select between no-firth, firth-fallback and "
+                              "firth")
     pheno_parser.add_argument("--variance-standardize", type=str, default=None, nargs='*',
                               help="the list of continuous phenotypes to standardize variance; accept the list of "
                               "columns from the --pheno file (if empty, applied to all); doesn't apply to dummy "
@@ -536,6 +539,10 @@ def make_saige_merge_commands(args, logistic, array_spec):
 
 def make_plink2_merge_commands(args, logistic):
     cmd = ''
+    # Move *.glm.logistic.hybrid and *.glm.firth to *.glm.logistic
+    cmd += """find . -type f -name "*.glm.logistic.hybrid" -exec sh -c 'mv "$1" "${1%.glm.logistic.hybrid}.glm"""
+    cmd += """.logistic"' _ {} \\;\n"""
+    cmd += """find . -type f -name "*.glm.firth" -exec sh -c 'mv "$1" "${1%.glm.firth}.glm.logistic"' _ {} \\;\n"""
     for pheno in args.pheno:
         cmd += '$PYTHON gwas.py merge-plink2 ' + \
             pass_arguments_along(args, ['info-file', 'info', 'maf', 'hwe', 'geno']) + \
@@ -1121,7 +1128,7 @@ def append_job(args, commands, array_spec, slurm_job_index, cmd_file, submit_job
 def rename_iid_column(log, pheno_dict, pheno):
     if np.sum(pheno_dict['TYPE'] == 'IID') != 1:
         raise ValueError('Exacly one column in the dictionary file must be marked as IID')
-    iid_column_name = pheno_dict.loc[pheno_dict['TYPE'] == 'IID', 'FIELD'][0]
+    iid_column_name = pheno_dict.loc[pheno_dict['TYPE'] == 'IID', 'FIELD'].values[0]
     if iid_column_name not in pheno.columns:
         raise ValueError(f'IID column ({iid_column_name}) not present in --pheno-file')
     if (iid_column_name != 'IID') and ('IID' in pheno.columns):

diff --git a/version/version.py b/version/version.py
@@ -2,7 +2,7 @@
 _MINOR = "8"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "2"
+_PATCH = "1"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""