Skip to content

Commit

Permalink
Merge pull request #254 from bkille/read-write-index
Browse files Browse the repository at this point in the history
Read/write index
  • Loading branch information
ekg authored Jun 26, 2024
2 parents 59f12f6 + 72b7868 commit 44d2ba0
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 111 deletions.
23 changes: 17 additions & 6 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void parse_args(int argc,
args::Group mandatory_opts(parser, "[ MANDATORY OPTIONS ]");
args::Positional<std::string> target_sequence_file(mandatory_opts, "target", "alignment target/reference sequence file");

args::Group io_opts(parser, "[ Files IO Options ]");
args::Group io_opts(parser, "[ Files IO Options ]");
args::Positional<std::string> query_sequence_file(io_opts, "query", "query sequence file (optional)");

args::Group mapping_opts(parser, "[ Mapping Options ]");
Expand All @@ -75,14 +75,14 @@ void parse_args(int argc,
args::ValueFlag<uint32_t> num_mappings_for_short_seq(mapping_opts, "N", "number of mappings to retain for each query/reference pair where the query sequence is shorter than segment length [default: 1]", {'S', "num-mappings-for-short-seq"});
args::ValueFlag<int> kmer_size(mapping_opts, "N", "kmer size [default: 19]", {'k', "kmer"});
args::ValueFlag<float> kmer_pct_threshold(mapping_opts, "%", "ignore the top % most-frequent kmers [default: 0.001]", {'H', "kmer-threshold"});
args::Flag lower_triangular(mapping_opts, "", "only map shorter sequences against longer", {'L', "lower-triangular"});
args::Flag lower_triangular(mapping_opts, "", "only map shorter sequences against longer", {'L', "lower-triangular"});
args::Flag skip_self(mapping_opts, "", "skip self mappings when the query and target name is the same (for all-vs-all mode)", {'X', "skip-self"});
args::Flag one_to_one(mapping_opts, "", "Perform one-to-one filtering", {'4', "one-to-one"});
args::ValueFlag<char> skip_prefix(mapping_opts, "C", "skip mappings when the query and target have the same prefix before the last occurrence of the given character C", {'Y', "skip-prefix"});
args::ValueFlag<std::string> target_prefix(mapping_opts, "pfx", "use only targets whose names start with this prefix", {'T', "target-prefix"});
args::ValueFlag<std::string> target_list(mapping_opts, "FILE", "file containing list of target sequence names to use", {'R', "target-list"});
args::ValueFlag<std::string> query_prefix(mapping_opts, "pfx[,pfx,...]", "use only queries whose names start with these prefixes (comma delimited)", {'Q', "query-prefix"});
args::ValueFlag<std::string> query_list(mapping_opts, "FILE", "file containing list of query sequence names", {'A', "query-list"});
args::ValueFlag<std::string> target_prefix(mapping_opts, "pfx", "use only targets whose names start with this prefix", {'T', "target-prefix"});
args::ValueFlag<std::string> target_list(mapping_opts, "FILE", "file containing list of target sequence names to use", {'R', "target-list"});
args::ValueFlag<std::string> query_prefix(mapping_opts, "pfx[,pfx,...]", "use only queries whose names start with these prefixes (comma delimited)", {'Q', "query-prefix"});
args::ValueFlag<std::string> query_list(mapping_opts, "FILE", "file containing list of query sequence names", {'A', "query-list"});
args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"});
args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"});
args::ValueFlag<std::string> chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, sets approximate maximum variant length detectable in alignment [default: 4*segment_length, up to 20k]", {'c', "chain-gap"});
Expand All @@ -99,6 +99,8 @@ void parse_args(int argc,
//args::ValueFlag<std::string> path_high_frequency_kmers(mapping_opts, "FILE", " input file containing list of high frequency kmers", {'H', "high-freq-kmers"});
//args::ValueFlag<std::string> spaced_seed_params(mapping_opts, "spaced-seeds", "Params to generate spaced seeds <weight_of_seed> <number_of_seeds> <similarity> <region_length> e.g \"10 5 0.75 20\"", {'e', "spaced-seeds"});
args::Flag no_merge(mapping_opts, "no-merge", "don't merge consecutive segment-level mappings", {'M', "no-merge"});
args::ValueFlag<std::string> mashmap_index(mapping_opts, "FILE", "Use MashMap index if FILE exists, else create one and save as FILE", {'4', "mm-index"});
args::Flag overwrite_mashmap_index(mapping_opts, "", "Confidence value for the hypergeometric filtering [default: 99.9%]", {'5', "overwrite-mm-index"});

args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
Expand Down Expand Up @@ -604,6 +606,15 @@ void parse_args(int argc,
//map_parameters.world_minimizers = true;
//}

if (mashmap_index)
{
map_parameters.indexFilename = args::get(mashmap_index);
} else {
map_parameters.indexFilename = "";
}

map_parameters.overwrite_index = overwrite_mashmap_index;

if (approx_mapping) {
map_parameters.outFileName = "/dev/stdout";
yeet_parameters.approx_mapping = true;
Expand Down
4 changes: 2 additions & 2 deletions src/map/include/map_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ struct Parameters
std::vector<std::string> refSequences; //reference sequence(s)
std::vector<std::string> querySequences; //query sequence(s)
std::string outFileName; //output file name
stdfs::path saveIndexFilename; //output file name of index
stdfs::path loadIndexFilename; //input file name of index
stdfs::path indexFilename; //output file name of index
bool overwrite_index; //overwrite index if it exists
bool split; //Split read mapping (done if this is true)
bool lower_triangular; // set to true if we should filter out half of the mappings
bool skip_self; //skip self mappings
Expand Down
21 changes: 8 additions & 13 deletions src/map/include/parseCmdArgs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ sequences shorter than segment length will be ignored", ArgvParser::OptionRequir

cmd.defineOption("numMappingsForShortSeq", "number of mappings to retain for each sequence shorter than segment length [default: 1]", ArgvParser::OptionRequiresValue);

cmd.defineOption("saveIndex", "Prefix of index files to save. PREFIX.map and PREFIX.index files will be created", ArgvParser::OptionRequiresValue);
cmd.defineOption("loadIndex", "Prefix of index files to load, where PREFIX.map and PREFIX.index are the files to be loaded", ArgvParser::OptionRequiresValue);
cmd.defineOption("index", "Writes index to provided filename if it doesn't exist, otherwise reads the index", ArgvParser::OptionRequiresValue);
cmd.defineOption("overwriteIndex", "Overwrites provided index filename");


cmd.defineOption("noSplit", "disable splitting of input sequences during mapping [enabled by default]");
Expand Down Expand Up @@ -370,19 +370,14 @@ sequences shorter than segment length will be ignored", ArgvParser::OptionRequir
}


if (cmd.foundOption("saveIndex")) {
str << cmd.optionValue("saveIndex");
str >> parameters.saveIndexFilename;
if (cmd.foundOption("index")) {
str << cmd.optionValue("index");
str >> parameters.indexFilename;
} else {
parameters.saveIndexFilename = "";
parameters.indexFilename = "";
}
if (cmd.foundOption("loadIndex")) {
str << cmd.optionValue("loadIndex");
str >> parameters.loadIndexFilename;
} else {
parameters.loadIndexFilename = "";
}
str.clear();

parameters.overwrite_index = cmd.foundOption("overwriteIndex");

parameters.alphabetSize = 4;
//Do not expose the option to set protein alphabet in mashmap
Expand Down
Loading

0 comments on commit 44d2ba0

Please sign in to comment.