Skip to content

Commit

Permalink
Merge pull request #248 from waveygang/stampede3-build
Browse files Browse the repository at this point in the history
Stampede3 build
  • Loading branch information
ekg authored Jun 16, 2024
2 parents 9ff0452 + d55cfe7 commit 1d142d9
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 58 deletions.
95 changes: 86 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Make sure no compiler-specific features are use
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

option(BUILD_STATIC "Build static binary" OFF)
option(BUILD_DEPS "Build external dependencies" OFF)
option(BUILD_RETARGETABLE "Build retargetable binary" OFF)

if (BUILD_STATIC)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
Expand All @@ -28,6 +30,7 @@ find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
find_package(LibLZMA REQUIRED)
find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
Expand All @@ -37,11 +40,23 @@ endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG")
if (NOT EXTRA_FLAGS)
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG")
if (NOT EXTRA_FLAGS)
if (BUILD_RETARGETABLE)
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3 -flto")
else()
set(EXTRA_FLAGS "-Ofast -march=native -flto")
endif()
endif()
endif ()

if (${CMAKE_BUILD_TYPE} MATCHES Generic)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG")
if (NOT EXTRA_FLAGS)
set(EXTRA_FLAGS "-Ofast -flto")
endif()
endif ()

if (${CMAKE_BUILD_TYPE} MATCHES Debug)
Expand All @@ -63,10 +78,77 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

add_subdirectory(src/common/wflign EXCLUDE_FROM_ALL)

include(ExternalProject)

if (BUILD_DEPS)
ExternalProject_Add(htslib
URL https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/htslib
CONFIGURE_COMMAND autoreconf -i && ./configure --prefix=${CMAKE_CURRENT_BINARY_DIR}/htslib --disable-libcurl --disable-s3
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
BUILD_IN_SOURCE 1
)

ExternalProject_Add(gsl
URL https://mirror.ibcp.fr/pub/gnu/gsl/gsl-2.8.tar.gz
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/gsl
CONFIGURE_COMMAND ./configure --prefix=${CMAKE_CURRENT_BINARY_DIR}/gsl
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
BUILD_IN_SOURCE 1
)

ExternalProject_Add(libdeflate
URL https://github.com/ebiggers/libdeflate/releases/download/v1.20/libdeflate-1.20.tar.gz
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libdeflate
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/libdeflate
-DCMAKE_BUILD_TYPE=Release
BUILD_COMMAND cmake --build . --config Release
INSTALL_COMMAND cmake --install . --config Release
BUILD_IN_SOURCE 1
)

add_dependencies(wfmash htslib gsl libdeflate)
endif()

add_executable(wfmash
src/common/utils.cpp
src/interface/main.cpp)

if (BUILD_DEPS)
target_include_directories(wfmash PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/htslib/include
${CMAKE_CURRENT_BINARY_DIR}/gsl/include
${CMAKE_CURRENT_BINARY_DIR}/libdeflate/include
)

target_link_libraries(wfmash
${CMAKE_CURRENT_BINARY_DIR}/gsl/lib/libgsl.a
${CMAKE_CURRENT_BINARY_DIR}/gsl/lib/libgslcblas.a
${CMAKE_CURRENT_BINARY_DIR}/htslib/lib/libhts.a
${CMAKE_CURRENT_BINARY_DIR}/libdeflate/lib/libdeflate.a
)
else()
#find_package(HTSLIB REQUIRED)
#find_package(GSL REQUIRED)
#find_package(LibDeflate REQUIRED)

#target_include_directories(wfmash PRIVATE
# ${HTSLIB_INCLUDE_DIR}
# ${GSL_INCLUDE_DIR}
# ${LIBDEFLATE_INCLUDE_DIR}
#)

target_link_libraries(wfmash
gsl
gslcblas
hts
deflate
)
endif()

target_include_directories(wfmash PRIVATE
src
src/common
Expand All @@ -75,19 +157,14 @@ target_include_directories(wfmash PRIVATE
)

target_link_libraries(wfmash
gsl
gslcblas
m
pthread
libwflign_static
hts
rt
wfa2cpp_static
jemalloc
lzma
bz2
z
deflate
Threads::Threads
)

Expand Down
130 changes: 92 additions & 38 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,66 +111,130 @@ To prevent lags when starting a mapping process, users should apply `samtools in
The `.fai` indexes are then used to quickly compute the sum of query lengths.


## installation
## Installation

### building from source
### Static binaries

The build is orchestrated with `cmake`. At least GCC version 9.3.0 is required for compilation. You can check your version via:
We provide [static builds of wfmash releases](https://github.com/waveygang/wfmash/releases) targeted at the `x86-64-v3` instruction set.

### Bioconda

`wfmash` recipes for Bioconda are available at https://anaconda.org/bioconda/wfmash.
To install the latest version using `Conda` execute:

``` bash
gcc --version
g++ --version
conda install -c bioconda wfmash
```

It may be necessary to install several system-level libraries to build `wfmash`. On `Ubuntu 20.04`, these can be installed using `apt`:
## Building from Source

```
sudo apt install build-essential cmake libjemalloc-dev zlib1g-dev libgsl-dev libhts-dev
```
The build process for `wfmash` is managed using `CMake`, providing various options to customize the build.

After installing the required dependencies, clone the `wfmash` git repository and build with:
### Prerequisites

Before building `wfmash`, you need the following dependencies installed on your system:

- GCC (version 9.3.0 or higher) or a recent version of Clang/LLVM
- CMake
- Zlib
- GSL
- HTSlib
- LibLZMA
- BZip2
- Threads
- OpenMP

On Ubuntu >20.04, these dependencies can be installed with the following command:

```sh
sudo apt install build-essential cmake zlib1g-dev libgsl-dev libhts-dev liblzma-dev libbz2-dev
```
git clone --recursive https://github.com/ekg/wfmash.git

### Clone the Repository

Clone the `wfmash` repository:

```sh
git clone https://github.com/waveygang/wfmash.git
cd wfmash
cmake -H. -Bbuild && cmake --build build -- -j 8
```

Of course, you can use as many cores as you like.
### Build Options

If your system has several versions of the `gcc`/`g++` compilers you might tell `cmake` which one to use with:
`wfmash` provides several CMake options to customize the build process:

- `BUILD_STATIC` (default: `OFF`): Build a static binary.
- `BUILD_DEPS` (default: `OFF`): Build external dependencies (htslib, gsl, libdeflate) from source. Use this if system libraries are not available or you want to use specific versions. HTSlib will be built without curl support, which removes a warning for static compilation related to `dlopen`.
- `BUILD_RETARGETABLE` (default: `OFF`): Build a retargetable binary. When this option is enabled, the binary will not include machine-specific optimizations (`-march=native`).

These can be mixed and matched.

### Building with System Libraries

To build `wfmash` using system libraries:

```sh
cmake -H. -Bbuild && cmake --build build -- -j 8
```
cmake -H. -Bbuild -DCMAKE_C_COMPILER='/usr/bin/gcc-10' -DCMAKE_CXX_COMPILER='/usr/bin/g++-10'
cmake --build build -- -j 8
```
The `wfmash` binary will be in `build/bin`.

#### Static compilation
This command will configure and build `wfmash` in the `build` directory, using as many cores as you specify with the `-j` option.

### Building with External Dependencies

By default, we build `wfmash` in Release mode (with optimizations) and as a dynamically linked executable.
Alternatively we can build a static binary:
If you need to build with external dependencies, use the `BUILD_DEPS` option:

```sh
cmake -H. -Bbuild -DBUILD_DEPS=ON && cmake --build build -- -j 8
```

This will download and build the necessary external dependencies.

### Building a Static Binary

To build a static binary, use the `BUILD_STATIC` option:

```sh
cmake -H. -Bbuild -DBUILD_STATIC=ON && cmake --build build -- -j 16
```

#### Notes for distribution
### Building a Retargetable Binary

If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=Generic` build type:
To build a retargetable binary, use the `BUILD_RETARGETABLE` option:

```shell
cmake -H. -Bbuild -D CMAKE_BUILD_TYPE=Generic && cmake --build build -- -j 3
```sh
cmake -H. -Bbuild -DBUILD_RETARGETABLE=ON && cmake --build build -- -j 8
```

#### Notes on dependencies
This will configure the build without `-march=native`, allowing the binary to be run on different types of machines.

### Installing

On `Arch Linux`, the `jemalloc` dependency can be installed with:
After building, you can install `wfmash` using:

```sh
cmake --install build
```
sudo pacman -S jemalloc # arch linux

This will install the `wfmash` binary and any required libraries to the default installation directory (typically `/usr/local/bin` for binaries).

#### Tests

To build and run tests:

```sh
cmake --build build --target test
```

#### Notes for distribution

If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=Generic` build type:

```shell
cmake -H. -Bbuild -D CMAKE_BUILD_TYPE=Generic && cmake --build build -- -j 8
```

The resulting binary should be compatible with all x86 processors.

#### Notes for debugging/plotting

To enable the functionality of emitting wavefront plots (in PNG format), tables (in TSV format), and timing information, add the `-DWFA_PNG_TSV_TIMING=ON` option:
Expand Down Expand Up @@ -227,16 +291,6 @@ singularity run wfmash.sif $ARGS

Where `$ARGS` are your typical command line arguments to `wfmash`.


### Bioconda

`wfmash` recipes for Bioconda are available at https://anaconda.org/bioconda/wfmash.
To install the latest version using `Conda` execute:

``` bash
conda install -c bioconda wfmash
```

### Guix

#### installing via the guix-genomics git repository
Expand Down
4 changes: 2 additions & 2 deletions src/common/wflign/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

if (${CMAKE_BUILD_TYPE} MATCHES Release)
#set(EXTRA_FLAGS "-Ofast -march=x86-64-v3 -flto -fno-fat-lto-objects")
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3 -g")
#set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast
endif ()

if (${CMAKE_BUILD_TYPE} MATCHES Debug)
Expand Down
4 changes: 2 additions & 2 deletions src/common/wflign/src/wflign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ int wflambda_extend_match(
if (f != alignments.end()) {
is_a_match = (alignments[k] != nullptr);
} else {
const int query_begin = v * step_size;
const int target_begin = h * step_size;
const int64_t query_begin = v * step_size;
const int64_t target_begin = h * step_size;

// The last fragment can be longer than segment_length_to_use (max 2*segment_length_to_use - 1)
const uint16_t segment_length_to_use_q =
Expand Down
2 changes: 1 addition & 1 deletion src/common/wflign/src/wflign_alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,4 +642,4 @@ bool hack_cigar(wfa::cigar_t &cigar, const char *query, const char *target,
}
}
return ok;
}*/
}*/
13 changes: 10 additions & 3 deletions src/common/wflign/src/wflign_patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@ bool do_wfa_segment_alignment(
const char* query,
std::vector<rkmh::hash_t>*& query_sketch,
const uint64_t& query_length,
const int& j,
const int64_t& j,
const std::string& target_name,
const char* target,
std::vector<rkmh::hash_t>*& target_sketch,
const uint64_t& target_length,
const int& i,
const int64_t& i,
const uint16_t& segment_length_q,
const uint16_t& segment_length_t,
const uint16_t& step_size,
wflign_extend_data_t* extend_data,
alignment_t& aln) {

// if our i or j index plus segment length in the query or target is too long we'll make a memory access error and weird stuff will happen
if (i + segment_length_t > target_length || j + segment_length_q > query_length) {
// display function parameters
std::cerr << "query_name: " << query_name << " query_length: " << query_length << " target_name: " << target_name << " target_length: " << target_length << std::endl;
std::cerr << "i: " << i << " j: " << j << " segment_length_t: " << segment_length_t << " segment_length_q: " << segment_length_q << std::endl;
}

// first make the sketches if we haven't yet
if (query_sketch == nullptr) {
query_sketch = new std::vector<rkmh::hash_t>();
Expand All @@ -48,7 +55,7 @@ bool do_wfa_segment_alignment(
++extend_data->num_sketches_allocated;
}
if (target_sketch == nullptr) {
target_sketch = new std::vector<rkmh::hash_t>();
target_sketch = new std::vector<rkmh::hash_t>();
*target_sketch = rkmh::hash_sequence(
target + i, segment_length_t, extend_data->minhash_kmer_size, (uint64_t)((float)segment_length_t * extend_data->mash_sketch_rate));
++extend_data->num_sketches_allocated;
Expand Down
Loading

0 comments on commit 1d142d9

Please sign in to comment.