Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix freq seed count #240

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void parse_args(int argc,
args::Group mandatory_opts(parser, "[ MANDATORY OPTIONS ]");
args::Positional<std::string> target_sequence_file(mandatory_opts, "target", "alignment target/reference sequence file");

args::Group io_opts(parser, "[ Files IO Options ]");
args::Group io_opts(parser, "[ Files IO Options ]");
args::Positional<std::string> query_sequence_file(io_opts, "query", "query sequence file (optional)");

args::Group mapping_opts(parser, "[ Mapping Options ]");
Expand All @@ -75,14 +75,14 @@ void parse_args(int argc,
args::ValueFlag<uint32_t> num_mappings_for_short_seq(mapping_opts, "N", "number of mappings to retain for each query/reference pair where the query sequence is shorter than segment length [default: 1]", {'S', "num-mappings-for-short-seq"});
args::ValueFlag<int> kmer_size(mapping_opts, "N", "kmer size [default: 19]", {'k', "kmer"});
args::ValueFlag<float> kmer_pct_threshold(mapping_opts, "%", "ignore the top % most-frequent kmers [default: 0.001]", {'H', "kmer-threshold"});
args::Flag lower_triangular(mapping_opts, "", "only map shorter sequences against longer", {'L', "lower-triangular"});
args::Flag lower_triangular(mapping_opts, "", "only map shorter sequences against longer", {'L', "lower-triangular"});
args::Flag skip_self(mapping_opts, "", "skip self mappings when the query and target name is the same (for all-vs-all mode)", {'X', "skip-self"});
args::Flag one_to_one(mapping_opts, "", "Perform one-to-one filtering", {'4', "one-to-one"});
args::ValueFlag<char> skip_prefix(mapping_opts, "C", "skip mappings when the query and target have the same prefix before the last occurrence of the given character C", {'Y', "skip-prefix"});
args::ValueFlag<std::string> target_prefix(mapping_opts, "pfx", "use only targets whose names start with this prefix", {'T', "target-prefix"});
args::ValueFlag<std::string> target_list(mapping_opts, "FILE", "file containing list of target sequence names to use", {'R', "target-list"});
args::ValueFlag<std::string> query_prefix(mapping_opts, "pfx[,pfx,...]", "use only queries whose names start with these prefixes (comma delimited)", {'Q', "query-prefix"});
args::ValueFlag<std::string> query_list(mapping_opts, "FILE", "file containing list of query sequence names", {'A', "query-list"});
args::ValueFlag<std::string> target_prefix(mapping_opts, "pfx", "use only targets whose names start with this prefix", {'T', "target-prefix"});
args::ValueFlag<std::string> target_list(mapping_opts, "FILE", "file containing list of target sequence names to use", {'R', "target-list"});
args::ValueFlag<std::string> query_prefix(mapping_opts, "pfx[,pfx,...]", "use only queries whose names start with these prefixes (comma delimited)", {'Q', "query-prefix"});
args::ValueFlag<std::string> query_list(mapping_opts, "FILE", "file containing list of query sequence names", {'A', "query-list"});
args::Flag approx_mapping(mapping_opts, "approx-map", "skip base-level alignment, producing an approximate mapping in PAF", {'m',"approx-map"});
args::Flag no_split(mapping_opts, "no-split", "disable splitting of input sequences during mapping [default: enabled]", {'N',"no-split"});
args::ValueFlag<std::string> chain_gap(mapping_opts, "N", "chain mappings closer than this distance in query and target, sets approximate maximum variant length detectable in alignment [default: 4*segment_length, up to 20k]", {'c', "chain-gap"});
Expand All @@ -99,6 +99,8 @@ void parse_args(int argc,
//args::ValueFlag<std::string> path_high_frequency_kmers(mapping_opts, "FILE", " input file containing list of high frequency kmers", {'H', "high-freq-kmers"});
//args::ValueFlag<std::string> spaced_seed_params(mapping_opts, "spaced-seeds", "Params to generate spaced seeds <weight_of_seed> <number_of_seeds> <similarity> <region_length> e.g \"10 5 0.75 20\"", {'e', "spaced-seeds"});
args::Flag no_merge(mapping_opts, "no-merge", "don't merge consecutive segment-level mappings", {'M', "no-merge"});
args::ValueFlag<std::string> mashmap_index(mapping_opts, "FILE", "Use MashMap index if FILE exists, else create one and save as FILE", {'4', "mm-index"});
args::Flag overwrite_mashmap_index(mapping_opts, "", "Confidence value for the hypergeometric filtering [default: 99.9%]", {'5', "overwrite-mm-index"});

args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
Expand Down Expand Up @@ -604,6 +606,15 @@ void parse_args(int argc,
//map_parameters.world_minimizers = true;
//}

if (mashmap_index)
{
map_parameters.indexFilename = args::get(mashmap_index);
} else {
map_parameters.indexFilename = "";
}

map_parameters.overwrite_index = overwrite_mashmap_index;

if (approx_mapping) {
map_parameters.outFileName = "/dev/stdout";
yeet_parameters.approx_mapping = true;
Expand Down
4 changes: 2 additions & 2 deletions src/map/include/map_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ struct Parameters
std::vector<std::string> refSequences; //reference sequence(s)
std::vector<std::string> querySequences; //query sequence(s)
std::string outFileName; //output file name
stdfs::path saveIndexFilename; //output file name of index
stdfs::path loadIndexFilename; //input file name of index
stdfs::path indexFilename; //output file name of index
bool overwrite_index; //overwrite index if it exists
bool split; //Split read mapping (done if this is true)
bool lower_triangular; // set to true if we should filter out half of the mappings
bool skip_self; //skip self mappings
Expand Down
21 changes: 8 additions & 13 deletions src/map/include/parseCmdArgs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ sequences shorter than segment length will be ignored", ArgvParser::OptionRequir

cmd.defineOption("numMappingsForShortSeq", "number of mappings to retain for each sequence shorter than segment length [default: 1]", ArgvParser::OptionRequiresValue);

cmd.defineOption("saveIndex", "Prefix of index files to save. PREFIX.map and PREFIX.index files will be created", ArgvParser::OptionRequiresValue);
cmd.defineOption("loadIndex", "Prefix of index files to load, where PREFIX.map and PREFIX.index are the files to be loaded", ArgvParser::OptionRequiresValue);
cmd.defineOption("index", "Writes index to provided filename if it doesn't exist, otherwise reads the index", ArgvParser::OptionRequiresValue);
cmd.defineOption("overwriteIndex", "Overwrites provided index filename");


cmd.defineOption("noSplit", "disable splitting of input sequences during mapping [enabled by default]");
Expand Down Expand Up @@ -370,19 +370,14 @@ sequences shorter than segment length will be ignored", ArgvParser::OptionRequir
}


if (cmd.foundOption("saveIndex")) {
str << cmd.optionValue("saveIndex");
str >> parameters.saveIndexFilename;
if (cmd.foundOption("index")) {
str << cmd.optionValue("index");
str >> parameters.indexFilename;
} else {
parameters.saveIndexFilename = "";
parameters.indexFilename = "";
}
if (cmd.foundOption("loadIndex")) {
str << cmd.optionValue("loadIndex");
str >> parameters.loadIndexFilename;
} else {
parameters.loadIndexFilename = "";
}
str.clear();

parameters.overwrite_index = cmd.foundOption("overwriteIndex");

parameters.alphabetSize = 4;
//Do not expose the option to set protein alphabet in mashmap
Expand Down
Loading
Loading