Skip to content

Commit

Permalink
gwas.py: fixed gwas demo run and IID column position (#234)
Browse files Browse the repository at this point in the history
* Added --firth option. Plink2 output files, *.glm.firth and *.glm.logistic.hybrid, are now renamed to *.logistic.

* shorten long lines

* fix typo

* Added fix for #232

* update version.py and changelog

* Update CHANGELOG.md

set version as 1.8.1

* Update version.py

set version as 1.8.1

---------

Co-authored-by: Espen Hagen <[email protected]>
  • Loading branch information
ttfiliz and espenhgn authored Mar 5, 2024
1 parent 1d9a4ec commit d4ef7de
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
15 changes: 11 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,31 @@ If MD5 sum is not listed for a certain release then it means that the container

### Added

* Additions goes here
* Additions go here

### Updated

* Updates goes here
* Updates go here

### Fixed

* Fixes goes here
* Fixes go here

### Removed

* Removals goes here
* Removals go here

### Misc

* Miscellaneous goes here

## [1.8.1] - 2024-03-05

### Fixed

* Fixed parsing of `IID` field in `pheno.dict`
* Fixed issue with files with different suffixes produced by plink2 for binary phenotypes in `gwas.py`

## [1.8.0] - 2024-02-22

### Added
Expand Down
9 changes: 8 additions & 1 deletion scripts/gwas/gwas.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ def parse_args(args):
help="covariates to control for (must be columns of the --pheno-file); individuals with "
"missing values for any covariates will be excluded not just from <out>.covar, but also "
"from <out>.pheno file")
pheno_parser.add_argument("--firth", type=str, default="firth-fallback",
help="regression mode for binary phenotypes. select between no-firth, firth-fallback and "
"firth")
pheno_parser.add_argument("--variance-standardize", type=str, default=None, nargs='*',
help="the list of continuous phenotypes to standardize variance; accept the list of "
"columns from the --pheno file (if empty, applied to all); doesn't apply to dummy "
Expand Down Expand Up @@ -536,6 +539,10 @@ def make_saige_merge_commands(args, logistic, array_spec):

def make_plink2_merge_commands(args, logistic):
cmd = ''
# Move *.glm.logistic.hybrid and *.glm.firth to *.glm.logistic
cmd += """find . -type f -name "*.glm.logistic.hybrid" -exec sh -c 'mv "$1" "${1%.glm.logistic.hybrid}.glm"""
cmd += """.logistic"' _ {} \\;\n"""
cmd += """find . -type f -name "*.glm.firth" -exec sh -c 'mv "$1" "${1%.glm.firth}.glm.logistic"' _ {} \\;\n"""
for pheno in args.pheno:
cmd += '$PYTHON gwas.py merge-plink2 ' + \
pass_arguments_along(args, ['info-file', 'info', 'maf', 'hwe', 'geno']) + \
Expand Down Expand Up @@ -1121,7 +1128,7 @@ def append_job(args, commands, array_spec, slurm_job_index, cmd_file, submit_job
def rename_iid_column(log, pheno_dict, pheno):
if np.sum(pheno_dict['TYPE'] == 'IID') != 1:
raise ValueError('Exacly one column in the dictionary file must be marked as IID')
iid_column_name = pheno_dict.loc[pheno_dict['TYPE'] == 'IID', 'FIELD'][0]
iid_column_name = pheno_dict.loc[pheno_dict['TYPE'] == 'IID', 'FIELD'].values[0]
if iid_column_name not in pheno.columns:
raise ValueError(f'IID column ({iid_column_name}) not present in --pheno-file')
if (iid_column_name != 'IID') and ('IID' in pheno.columns):
Expand Down
2 changes: 1 addition & 1 deletion version/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
_MINOR = "8"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "2"
_PATCH = "1"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""
Expand Down

0 comments on commit d4ef7de

Please sign in to comment.