Skip to content

build: Enable Spark query runner as reference in aggregation fuzzer test #282

build: Enable Spark query runner as reference in aggregation fuzzer test

build: Enable Spark query runner as reference in aggregation fuzzer test #282

Workflow file for this run

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Experimental Fuzzer Jobs"
on:
pull_request:
paths:
- ".github/workflows/experimental.yml"
workflow_dispatch:
inputs:
ref:
description: 'Ref to checkout out'
default: 'main'
numThreads:
description: 'Number of threads'
default: 16
maxHighMemJobs:
description: 'Number of high memory jobs'
default: 8
maxLinkJobs:
description: 'Maximum number of link jobs'
default: 4
extraCMakeFlags:
description: 'Additional CMake flags'
default: ''
defaults:
run:
shell: bash
permissions:
contents: read
jobs:
compile:
runs-on: 16-core-ubuntu
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "ubuntu"
steps:
- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-benchmark-${{ github.sha }}
restore-keys: |
ccache-benchmark-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: cd velox && source ./scripts/setup-ubuntu.sh && install_apt_deps
- name: "Build"
run: |
cd velox
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: Upload aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: aggregation
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test
- name: Upload spark aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: spark_aggregation_fuzzer
path: velox/_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test
- name: Upload aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: aggregation
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test
- name: Upload join fuzzer
uses: actions/upload-artifact@v3
with:
name: join
path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test
- name: Upload Presto expression fuzzer
uses: actions/upload-artifact@v3
with:
name: presto_expression_fuzzer
path: velox/_build/debug/velox/expression/fuzzer/velox_expression_fuzzer_test
- name: Upload Spark expression fuzzer
uses: actions/upload-artifact@v3
with:
name: spark_expression_fuzzer
path: velox/_build/debug/velox/expression/fuzzer/spark_expression_fuzzer_test
presto-java-aggregation-fuzzer-run:
runs-on: 16-core-ubuntu
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-presto-${{ github.sha }}
restore-keys: |
ccache-presto-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
- name: "Build"
run: |
cd velox
source /opt/rh/gcc-toolset-12/enable
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: "Run Aggregate Fuzzer"
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
mkdir -p /tmp/aggregate_fuzzer_repro/
rm -rfv /tmp/aggregate_fuzzer_repro/*
chmod -R 777 /tmp/aggregate_fuzzer_repro
_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 3600 \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: aggregate-fuzzer-failure-artifacts
path: |
/tmp/aggregate_fuzzer_repro
/tmp/server.log
spark-java-aggregation-fuzzer-run:
runs-on: 16-core-ubuntu
container: ghcr.io/facebookincubator/velox-dev:spark-server
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-spark-${{ github.sha }}
restore-keys: |
ccache-spark-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
- name: "Build"
run: |
cd velox
source /opt/rh/gcc-toolset-12/enable
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: "Run Spark Aggregate Fuzzer"
run: |
cd velox
bash /opt/start-spark.sh
# Sleep for 60 seconds to allow Spark server to start.
sleep 60
mkdir -p /tmp/spark_aggregate_fuzzer_repro/
rm -rfv /tmp/spark_aggregate_fuzzer_repro/*
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 3600 \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \
--enable_sorted_aggregations=false \
&& echo -e "\n\nSpark Aggregation Fuzzer run finished successfully."
- name: Archive Spark aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: spark-agg-fuzzer-failure-artifacts
path: |
/tmp/spark_aggregate_fuzzer_repro
linux-join-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps
- name: Download join fuzzer
uses: actions/download-artifact@v3
with:
name: join
- name: "Run Join Fuzzer"
run: |
ls /lib64
mkdir -p /tmp/join_fuzzer_repro/
rm -rfv /tmp/join_fuzzer_repro/*
chmod -R 777 /tmp/join_fuzzer_repro
chmod +x velox_join_fuzzer_test
./velox_join_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=0 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: join-fuzzer-failure-artifacts
path: |
/tmp/join_fuzzer_repro
presto-expression-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps
- name: Download presto fuzzer
uses: actions/download-artifact@v3
with:
name: presto_expression_fuzzer
- name: "Run Presto Fuzzer"
run: |
mkdir -p /tmp/presto_fuzzer_repro/
rm -rfv /tmp/presto_fuzzer_repro/*
chmod -R 777 /tmp/presto_fuzzer_repro
chmod +x velox_expression_fuzzer_test
./velox_expression_fuzzer_test \
--seed ${RANDOM} \
--enable_variadic_signatures \
--velox_fuzzer_enable_complex_types \
--velox_fuzzer_enable_decimal_type \
--lazy_vector_generation_ratio 0.2 \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--enable_dereference \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=1 \
--repro_persist_path=/tmp/presto_fuzzer_repro \
&& echo -e "\n\nFuzzer run finished successfully."
- name: Archive Presto expression production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: presto-fuzzer-failure-artifacts
path: |
/tmp/presto_fuzzer_repro
spark-expression-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps
- name: Download spark fuzzer
uses: actions/download-artifact@v3
with:
name: spark_expression_fuzzer
- name: "Run Spark Fuzzer"
run: |
mkdir -p /tmp/spark_fuzzer_repro/
rm -rfv /tmp/spark_fuzzer_repro/*
chmod -R 777 /tmp/spark_fuzzer_repro
chmod +x spark_expression_fuzzer_test
./spark_expression_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=1 \
--repro_persist_path=/tmp/spark_fuzzer_repro \
--velox_fuzzer_enable_decimal_type \
--retry_with_try \
&& echo -e "\n\nSpark Fuzzer run finished successfully."
- name: Archive Spark expression production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: spark-fuzzer-failure-artifacts
path: |
/tmp/spark_fuzzer_repro