68
68
69
69
70
70
def get_empty_snps_dataframe ():
71
- """ Get empty dataframe normalized for usage with ``snps``.
71
+ """Get empty dataframe normalized for usage with ``snps``.
72
72
73
73
Returns
74
74
-------
@@ -81,7 +81,7 @@ def get_empty_snps_dataframe():
81
81
82
82
83
83
class Reader :
84
- """ Class for reading and parsing raw data / genotype files. """
84
+ """Class for reading and parsing raw data / genotype files."""
85
85
86
86
def __init__ (self , file = "" , only_detect_source = False , resources = None , rsids = ()):
87
87
"""Initialize a `Reader`.
@@ -104,7 +104,7 @@ def __init__(self, file="", only_detect_source=False, resources=None, rsids=()):
104
104
self ._rsids = frozenset (rsids )
105
105
106
106
def read (self ):
107
- """ Read and parse a raw data / genotype file.
107
+ """Read and parse a raw data / genotype file.
108
108
109
109
Returns
110
110
-------
@@ -447,18 +447,70 @@ def read_23andme(self, file, compression):
447
447
"""
448
448
449
449
def parser ():
450
- return (
451
- pd .read_csv (
452
- file ,
453
- comment = "#" ,
454
- sep = "\t " ,
455
- na_values = "--" ,
456
- names = ["rsid" , "chrom" , "pos" , "genotype" ],
457
- index_col = 0 ,
458
- dtype = NORMALIZED_DTYPES ,
459
- compression = compression ,
460
- ),
450
+ df = pd .read_csv (
451
+ file ,
452
+ comment = "#" ,
453
+ sep = "\t " ,
454
+ na_values = "--" ,
455
+ names = ["rsid" , "chrom" , "pos" , "genotype" ],
456
+ compression = compression ,
461
457
)
458
+ df = df .dropna (subset = ["rsid" , "chrom" , "pos" ])
459
+ # turn number numbers into string numbers
460
+ df ["chrom" ] = df ["chrom" ].map (
461
+ {
462
+ "1" : "1" ,
463
+ "2" : "2" ,
464
+ "3" : "3" ,
465
+ "4" : "4" ,
466
+ "5" : "5" ,
467
+ "6" : "6" ,
468
+ "7" : "7" ,
469
+ "8" : "8" ,
470
+ "9" : "9" ,
471
+ "10" : "10" ,
472
+ "11" : "11" ,
473
+ "12" : "12" ,
474
+ "13" : "13" ,
475
+ "14" : "14" ,
476
+ "15" : "15" ,
477
+ "16" : "16" ,
478
+ "17" : "17" ,
479
+ "18" : "18" ,
480
+ "19" : "19" ,
481
+ "20" : "20" ,
482
+ "21" : "21" ,
483
+ "22" : "22" ,
484
+ 1 : "1" ,
485
+ 2 : "2" ,
486
+ 3 : "3" ,
487
+ 4 : "4" ,
488
+ 5 : "5" ,
489
+ 6 : "6" ,
490
+ 7 : "7" ,
491
+ 8 : "8" ,
492
+ 9 : "9" ,
493
+ 10 : "10" ,
494
+ 11 : "11" ,
495
+ 12 : "12" ,
496
+ 13 : "13" ,
497
+ 14 : "14" ,
498
+ 15 : "15" ,
499
+ 16 : "16" ,
500
+ 17 : "17" ,
501
+ 18 : "18" ,
502
+ 19 : "19" ,
503
+ 20 : "20" ,
504
+ 21 : "21" ,
505
+ 22 : "22" ,
506
+ "X" : "X" ,
507
+ "Y" : "Y" ,
508
+ "MT" : "MT" ,
509
+ }
510
+ )
511
+ df = df .astype (dtype = NORMALIZED_DTYPES )
512
+ df = df .set_index ("rsid" )
513
+ return (df ,)
462
514
463
515
return self .read_helper ("23andMe" , parser )
464
516
@@ -725,7 +777,7 @@ def parser():
725
777
return self .read_helper ("LivingDNA" , parser )
726
778
727
779
def read_mapmygenome (self , file , compression , header ):
728
- """ Read and parse Mapmygenome file.
780
+ """Read and parse Mapmygenome file.
729
781
730
782
https://mapmygenome.in
731
783
@@ -1065,7 +1117,7 @@ def parser():
1065
1117
return self .read_helper ("DNA.Land" , parser )
1066
1118
1067
1119
def read_snps_csv (self , file , comments , compression ):
1068
- """ Read and parse CSV file generated by ``snps``.
1120
+ """Read and parse CSV file generated by ``snps``.
1069
1121
1070
1122
https://pypi.org/project/snps/
1071
1123
0 commit comments