Skip to content

Commit

Permalink
Add make gapless input option
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Jul 16, 2024
1 parent 6c29dc8 commit de372c0
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 4 deletions.
9 changes: 9 additions & 0 deletions src/commands/convert/sync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ void setup_sync( CLI::App& app )
);
options->gapless.option->group( "Settings" );

// For user convenience, we also check if the general settings option for making
// the stream gapless is given, to improve the option description here.
auto make_gapless_opt = sub->get_option_no_throw( "--make-gapless" );
if( make_gapless_opt ) {
auto descr = options->gapless.option->get_description();
descr += " Note: This option is an alias for the more general `--make-gapless` option.";
options->gapless.option->description( descr );
}

// Guess Reference Base
options->guess_ref_base.option = sub->add_flag(
"--guess-reference-base",
Expand Down
25 changes: 25 additions & 0 deletions src/options/variant_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,26 @@ void VariantInputOptions::add_input_files_opts_to_app(
CLI::IsMember( enum_map_keys( multi_file_contribution_type_map_ ), CLI::ignore_case )
);

// Make gapless stream. If renamed, change it in the `sync` command as well,
// where we use the option name to imporove the option description there.
make_gapless_stream_.option = sub->add_flag(
"--make-gapless",
make_gapless_stream_.value,
"By default, we only operate on the positions for which there is data. "
"In particular, positions that are absent in the input are completely ignored; "
"they do not even show up in the `missing` column of output tables. "
"This is because for the statistics, data being absend or (marked as) missing "
"is merely a sementic distinction, but it does not change the results. "
"However, it might make processing with downstream tools easier if the output contains "
"all positions, for instance when using `single` windows. "
"With this option, all absent positions are filled in as missing data, so that they "
"show up in the `missing` column and as entries in single windows. "
"If a referene genome or dictionary is given, this might also include positions beyond "
"where there is input data, up until the length of each chromosome. "
"Note that this can lead to large ouput tables when processing single positions."
);
make_gapless_stream_.option->group( group );

// Hidden options to set the Generic Input Stream block sizes for speed.

// First for the main block size of the stream that is collecing all Variants,
Expand Down Expand Up @@ -680,6 +700,11 @@ void VariantInputOptions::conditionally_make_gapless_stream_() const
return;
}

// Also if the user provided the gapless option here.
if( make_gapless_stream_.value ) {
gapless_stream_ = true;
}

// Now we check if we want to make a gapless stream, e.g., by demand from a command.
// If so, we further check if any references are given, which we then want to use
// for the stream in order to properly get the chromosome lengths.
Expand Down
11 changes: 11 additions & 0 deletions src/options/variant_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,16 @@ class VariantInputOptions
gapless_stream_ = value;
}

/**
* @brief Return whether the stream is made gapless due to some setting.
*
* This option is set to its correct value only after get_stream() has been called.
*/
bool gapless_stream() const
{
return gapless_stream_;
}

/**
* @brief Transformations and filters for individual input sources.
*
Expand Down Expand Up @@ -320,6 +330,7 @@ class VariantInputOptions
// to avoid code repetition when adding and processing them here.
std::vector<std::unique_ptr<VariantFileOptions>> input_files_;
CliOption<std::string> multi_file_loci_set_ = "union";
CliOption<bool> make_gapless_stream_ = false;

// Hidden options to set the Generic Input Stream block size for speed.
CliOption<size_t> iterator_block_size_ = 8192;
Expand Down
17 changes: 14 additions & 3 deletions src/options/window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ std::unique_ptr<VariantWindowStream> WindowOptions::get_variant_window_stream(
}
case WindowType::kSingle: {
result = genesis::utils::make_unique<VariantPositionWindowStream>(
get_variant_window_stream_single_( input_stream )
get_variant_window_stream_single_( input_stream, variant_input.gapless_stream() )
);
break;
}
Expand Down Expand Up @@ -413,7 +413,7 @@ std::unique_ptr<VariantWindowViewStream> WindowOptions::get_variant_window_view_
case WindowType::kSingle: {
result = genesis::utils::make_unique<WindowViewStream>(
make_window_view_stream(
get_variant_window_stream_single_( input_stream )
get_variant_window_stream_single_( input_stream, variant_input.gapless_stream() )
)
);
break;
Expand Down Expand Up @@ -598,8 +598,19 @@ WindowOptions::get_variant_window_stream_queue_(

WindowOptions::VariantPositionWindowStream
WindowOptions::get_variant_window_stream_single_(
genesis::population::VariantInputStream& input
genesis::population::VariantInputStream& input,
bool is_gapless_stream
) const {
// If our input is meant to be gapless, we also do not want to filter for passing positions here.
// That will produce huge output, but well, it's want the user wants :-)
// As far as we can tell, this is the only type of window that needs this;
// all other window types behave exactly the same, except that they also are counting
// the missing positions properly when a gapless stream is given.
if( is_gapless_stream ) {
return genesis::population::make_default_position_window_stream(
input.begin(), input.end()
);
}
return genesis::population::make_passing_variant_position_window_stream(
input.begin(), input.end()
);
Expand Down
3 changes: 2 additions & 1 deletion src/options/window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ class WindowOptions
) const;

VariantPositionWindowStream get_variant_window_stream_single_(
genesis::population::VariantInputStream& input
genesis::population::VariantInputStream& input,
bool is_gapless_stream
) const;

VariantRegionWindowStream get_variant_window_stream_regions_(
Expand Down

0 comments on commit de372c0

Please sign in to comment.