Skip to content

Compile boost (minimal) from source, add more documentation, updated results #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ regex crate for defined expressions.
The different engines have different requirements which are not described here.
Please see the related project documentations.

On Ubuntu 20.04 these were necessary installs to get the build done from a stock AWS box
```bash
$ apt install build-essential cmake rustc cargo automake autoconf autopoint autogen \
libtool libprotobuf-dev libprotobuf-c-dev protobuf-compiler ninja-build \
ragel libpcap pcaputils pkg-config libboost-dev flex bison
```

In the case all depencies are fulfilled, just configure and build the cmake based project:

```bash
Expand Down Expand Up @@ -98,8 +105,29 @@ python3 ../genspreadsheet.py results.csv
It will save an Excel spreadsheet with the name `regex-results-YYYYMMDD-HHMMSS.xlsx` in the current
directory.

## Compiling with clang + libc++

Unfortunately it is not possible to run both standard C++ from GCC/stdlibc++ and clang+libc++ at the
same time, it is just the way that cmake selects a single compiler.

To run with clang+libc++ use the following recipe:
```bash
mkdir build && cd build
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_EXE_LINKER_FLAGS="-lc++abi -lc++" \
-DCMAKE_CXX_COMPILER=/usr/local/bin/clang++ \
-DCMAKE_C_COMPILER=/usr/local/bin/clang \
-DCMAKE_CXX_FLAGS_INIT="-std=c++20 -stdlib=libc++ -march=native -mtune=native" \
-G Ninja ..
```

## Results

These results were obtained in an AMD Threadripper 3960X (Zen2) at 3.8 GHz running Ubuntu 20.04.5 LTS.

![Updated Performance Results](results_20221012.png "Performance Results")
![Updated Performance Results](results_threadripper.png "Performance Results")

IceLake Xeon Platinum 8375C @ 2.90GHz (AWS C6i instance) - no mitigations

![IceLake Server](results_icelake.png "Results Ice Lake")
24 changes: 10 additions & 14 deletions genspreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,8 @@
print("Usage: genspreadsheet.py <results.txt>\n")
sys.exit(0)

regexre = re.compile('Regex:\s*(.*)')
resultre = re.compile('\[\s*(\S+)\]\s*time:\s*([\d\.]+).*matches:\s*(\d+)')

infilename = sys.argv[1]
current_regex = None
results = {}
stats = None
scanners = set()
with open( infilename, "r" ) as filein:
headers = filein.readline().split(';')
Expand All @@ -37,14 +32,15 @@
workbook = xlsxwriter.Workbook(outfilename)
worksheet = workbook.add_worksheet()
worksheet.hide_gridlines(2)
worksheet.set_column(0,0,30)
worksheet.set_column(0,0,35)
worksheet.set_column(1,len(scanners),10)
worksheet.set_row(0,20)

# Add a bold format to use to highlight cells.
bold = workbook.add_format({'bold': True})
boldrot = workbook.add_format({'bold': True})
boldrot.set_rotation(0)
headerfmt = workbook.add_format({'bold': True})
headerfmt.set_bg_color('gray')
headerfmt.set_font_color('white')
headerfmt.set_rotation(0)
highfmt = workbook.add_format({'bold': True})
highfmt.set_bg_color( 'orange' )
highfmt.set_font_color( 'white' )
Expand All @@ -56,19 +52,19 @@
warnfmt.set_font_color( 'black' )
warnfmt.set_align('center')

# Write some data headers.
scanners = list(scanners)
# Write headers.
scanners = sorted(list(scanners))
row = 0
for col,scanner in enumerate(scanners):
worksheet.write( row, col+1, scanner, boldrot )
worksheet.write( row, 0, "Regex", bold)
worksheet.write( row, col+1, scanner, headerfmt )
worksheet.write( row, 0, "Regex", headerfmt )

for regex,stats in results.items():
values = sorted([ ms for ms in stats.values() ])
lowcut = values[1]
highcut = values[-2]
row += 1
worksheet.write( row, 0, regex, bold )
worksheet.write( row, 0, regex, headerfmt )
for col,scanner in enumerate(scanners):
if scanner not in stats:
worksheet.write( row, col+1, "n/a", warnfmt )
Expand Down
Binary file added results_icelake.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added results_threadripper.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
79 changes: 67 additions & 12 deletions vendor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,58 @@ function(AddExternalProject NAME LIB_NAME URL TAG)
endif()
endfunction()


# building a minimal boost with just boost::regex
ExternalProject_Add(
libboost
GIT_REPOSITORY "https://github.com/boostorg/boost.git"
GIT_TAG master
GIT_SUBMODULES
tools/build
tools/boost_install
libs/regex
libs/config
libs/headers
libs/throw_exception
libs/exception
libs/assert
GIT_SHALLOW ON
GIT_SUBMODULES_RECURSE OFF
PREFIX ${CMAKE_CURRENT_SOURCE_DIR}
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/boost
TMP_DIR ${PROJECT_BINARY_DIR}/boost-tmp
STAMP_DIR ${PROJECT_BINARY_DIR}/boost-stamp
BINARY_DIR ${PROJECT_BINARY_DIR}/boost-build
DOWNLOAD_DIR ${PROJECT_BINARY_DIR}/boost-down
CONFIGURE_COMMAND
cd ${CMAKE_CURRENT_SOURCE_DIR}/boost &&
./bootstrap.sh
BUILD_COMMAND
cd ${CMAKE_CURRENT_SOURCE_DIR}/boost &&
./b2 headers &&
./b2 install -q -a
--prefix=${CMAKE_CURRENT_SOURCE_DIR}/local
--build-type=minimal
--layout=system
--disable-icu
--with-regex
variant=release link=static runtime-link=static
threading=single address-model=64 architecture=x86
INSTALL_COMMAND ""
)

set(INCLUDE_BOOST "local" CACHE STRING "Use boost::regex library form local built, system or disable usage.")
set_property(CACHE INCLUDE_BOOST PROPERTY STRINGS "local" "system" "disabled")
message("-- Include boost: ${INCLUDE_BOOST}")

# hyperscan
AddExternalProject(
"hyperscan"
"hs"
"https://github.com/01org/hyperscan.git"
"master"
-DCMAKE_BUILD_TYPE=Release -DFAT_RUNTIME=OFF -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
-DCMAKE_BUILD_TYPE=Release -DFAT_RUNTIME=OFF
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
)

# oniguruma
Expand All @@ -86,7 +131,8 @@ AddExternalProject(
"onig"
"https://github.com/kkos/oniguruma.git"
"master"
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
-DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DINSTALL_DOCUMENTATION=OFF
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
)

# re2
Expand All @@ -95,7 +141,8 @@ AddExternalProject(
"re2"
"https://github.com/google/re2.git"
"main"
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
)

# tre
Expand All @@ -104,7 +151,10 @@ AddExternalProject(
"tre"
"https://github.com/laurikari/tre.git"
"master"
cd ${CMAKE_CURRENT_SOURCE_DIR}/tre/ && ./utils/autogen.sh && cd ${PROJECT_BINARY_DIR}/tre-build && ${CMAKE_CURRENT_SOURCE_DIR}/tre/configure --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local
cd ${CMAKE_CURRENT_SOURCE_DIR}/tre/ &&
./utils/autogen.sh && cd ${PROJECT_BINARY_DIR}/tre-build &&
${CMAKE_CURRENT_SOURCE_DIR}/tre/configure
--prefix=${CMAKE_CURRENT_SOURCE_DIR}/local
)

# pcre2
Expand All @@ -113,7 +163,9 @@ AddExternalProject(
"pcre2-8"
"https://github.com/PhilipHazel/pcre2.git"
"master"
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local -DPCRE2_SUPPORT_JIT=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
-DPCRE2_SUPPORT_JIT=ON
)

# c++ standard
Expand All @@ -127,8 +179,9 @@ AddExternalProject(
"ctre"
"ctre"
"https://github.com/hanickadot/compile-time-regular-expressions.git"
"master"
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
"main"
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local
)

# yara
Expand All @@ -137,11 +190,13 @@ AddExternalProject(
"yara"
"https://github.com/VirusTotal/yara.git"
"master"
cd ${CMAKE_CURRENT_SOURCE_DIR}/yara/ && ./bootstrap.sh && cd ${PROJECT_BINARY_DIR}/yara-build && ${CMAKE_CURRENT_SOURCE_DIR}/yara/configure --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local
cd ${CMAKE_CURRENT_SOURCE_DIR}/yara/ &&
./bootstrap.sh &&
cd ${PROJECT_BINARY_DIR}/yara-build &&
${CMAKE_CURRENT_SOURCE_DIR}/yara/configure
--prefix=${CMAKE_CURRENT_SOURCE_DIR}/local

)

# boost - I'm not going to build boost here
set(INCLUDE_BOOST "system" CACHE STRING "Use boost::regex library form local built, system or disable usage.")
set_property(CACHE INCLUDE_BOOST PROPERTY STRINGS "local" "system" "disabled")
message("-- Include boost: ${INCLUDE_BOOST}")