Merge pull request #95 from exarkun/benchmark

Benchmark the Haskell `encode` and `decode` APIs
tahoe-lafs · Sep 19, 2023 · 12a1e95 · 12a1e95
2 parents 92faaca + ff60d01
commit 12a1e95
Show file tree

Hide file tree

Showing 5 changed files with 299 additions and 4 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -33,19 +33,43 @@ jobs:
         experimental-features = nix-command flakes
         allow-import-from-derivation = true
 
+      # CACHIX_AUTH_TOKEN is manually set in the CircleCI web UI and allows us
+      # to push to CACHIX_NAME.  CACHIX_NAME tells cachix which cache to push
+      # to.
+      CACHIX_NAME: "tahoe-lafs-opensource"
+
     steps:
+      - "run":
+          # Get cachix for Nix-friendly caching.
+          name: "Install Basic Dependencies"
+          command: |
+            # Get some build environment dependencies and let them float on a
+            # certain release branch.  These aren't involved in the actual
+            # package build (only in CI environment setup) so the fact that
+            # they float shouldn't hurt reproducibility.
+            NIXPKGS="nixpkgs/nixos-23.05"
+            nix profile install $NIXPKGS#cachix $NIXPKGS#bash $NIXPKGS#jp
+
+            # Activate our cachix cache for "binary substitution".  This sets
+            # up configuration tht lets Nix download something from the cache
+            # instead of building it locally, if possible.
+            cachix use "${CACHIX_NAME}"
+
       - "checkout"
+
       - run:
           name: "nix flake check"
           command: |
-            nix flake show
-            nix flake check -v
+            source .circleci/lib.sh
+            cache_if_able nix flake show
+            cache_if_able nix flake check -v
 
       - run:
           name: "Nix Build"
           no_output_timeout: "90m"
           command: |
-            nix build --print-build-logs
+            source .circleci/lib.sh
+            cache_if_able nix build --print-build-logs
 
 workflows:
   ci:

diff --git a/.circleci/lib.sh b/.circleci/lib.sh
@@ -0,0 +1,148 @@
+# CircleCI build environment looks like it has a zillion and a half cores.
+# Don't let Nix autodetect this high core count because it blows up memory
+# usage and fails the test run.  Pick a number of cores that suits the build
+# environment we're paying for (the free one!).
+DEPENDENCY_CORES=3
+
+# Once dependencies are built, we can allow some more concurrency for our own
+# test suite.
+UNITTEST_CORES=8
+
+# Run a command, enabling cache writes to cachix if possible.  The command is
+# accepted as a variable number of positional arguments (like argv).
+function cache_if_able() {
+    # Dump some info about our build environment.
+    describe_build
+
+    if is_cache_writeable; then
+	# If the cache is available we'll use it.  This lets fork owners set
+	# up their own caching if they want.
+	echo "Cachix credentials present; will attempt to write to cache."
+
+	# The `cachix watch-exec ...` does our cache population.  When it sees
+	# something added to the store (I guess) it pushes it to the named
+	# cache.
+	cachix watch-exec "${CACHIX_NAME}" -- "$@"
+    else
+	if is_cache_required; then
+	    echo "Required credentials (CACHIX_AUTH_TOKEN) are missing."
+	    return 1
+	else
+	    echo "Cachix credentials missing; will not attempt cache writes."
+	    "$@"
+	fi
+    fi
+}
+
+function is_cache_writeable() {
+    # We can only *push* to the cache if we have a CACHIX_AUTH_TOKEN.  in-repo
+    # jobs will get this from CircleCI configuration but jobs from forks may
+    # not.
+    [ -v CACHIX_AUTH_TOKEN ]
+}
+
+function is_cache_required() {
+    # If we're building in tahoe-lafs/zfec then we must use the cache.  If
+    # we're building anything from a fork then we're allowed to not have the
+    # credentials.
+    is_upstream
+}
+
+# Return success if the origin of this build is the tahoe-lafs/zfec repository
+# itself (and so we expect to have cache credentials available), failure
+# otherwise.
+#
+# See circleci.txt for notes about how this determination is made.
+function is_upstream() {
+    # CIRCLE_PROJECT_USERNAME is set to the org the build is happening for.
+    # If a PR targets a fork of the repo then this is set to something other
+    # than "tahoe-lafs".
+    [ "$CIRCLE_PROJECT_USERNAME" == "tahoe-lafs" ] &&
+
+	# CIRCLE_BRANCH is set to the real branch name for in-repo PRs and
+	# "pull/NNNN" for pull requests from forks.
+	#
+	# CIRCLE_PULL_REQUESTS is set to a comma-separated list of the full
+	# URLs of the PR pages which share an underlying branch, with one of
+	# them ended with that same "pull/NNNN" for PRs from forks.
+	! any_element_endswith "/$CIRCLE_BRANCH" "," "$CIRCLE_PULL_REQUESTS"
+}
+
+# Return success if splitting $3 on $2 results in an array with any element
+# that ends with $1, failure otherwise.
+function any_element_endswith() {
+    suffix=$1
+    shift
+
+    sep=$1
+    shift
+
+    haystack=$1
+    shift
+
+    IFS="${sep}" read -r -a elements <<< "$haystack"
+    for elem in "${elements[@]}"; do
+	if endswith "$suffix" "$elem"; then
+	    return 0
+	fi
+    done
+    return 1
+}
+
+# Return success if $2 ends with $1, failure otherwise.
+function endswith() {
+    suffix=$1
+    shift
+
+    haystack=$1
+    shift
+
+    case "$haystack" in
+	*${suffix})
+	    return 0
+	    ;;
+
+	*)
+	    return 1
+	    ;;
+    esac
+}
+
+function describe_build() {
+    echo "Building PR for user/org: ${CIRCLE_PROJECT_USERNAME}"
+    echo "Building branch: ${CIRCLE_BRANCH}"
+    if is_upstream; then
+	echo "Upstream build."
+    else
+	echo "Non-upstream build."
+    fi
+    if is_cache_required; then
+	echo "Cache is required."
+    else
+	echo "Cache not required."
+    fi
+    if is_cache_writeable; then
+	echo "Cache is writeable."
+    else
+	echo "Cache not writeable."
+    fi
+}
+
+# Inspect the flake input metadata for an input of a given name and return the
+# revision at which that input is pinned.  If the input does not exist then
+# return garbage (probably "null").
+read_input_revision() {
+    input_name=$1
+    shift
+
+    nix flake metadata --json | jp --unquoted 'locks.nodes."'"$input_name"'".locked.rev'
+}
+
+# Return a flake reference that refers to a certain revision of nixpkgs.  The
+# certain revision is the revision to which the specified input is pinned.
+nixpkgs_flake_reference() {
+    input_name=$1
+    shift
+
+    echo "github:NixOS/nixpkgs?rev=$(read_input_revision $input_name)"
+}
diff --git a/benchmark-zfec/Main.hs b/benchmark-zfec/Main.hs
@@ -0,0 +1,93 @@
+module Main where
+
+import Codec.FEC (FECParams (paramK, paramN), decode, encode, fec, initialize)
+import Control.Monad (replicateM)
+import Criterion.Main (Benchmark, bench, bgroup, defaultMain, env, nf)
+import Data.Bifunctor (bimap)
+import qualified Data.ByteString as B
+import Data.List (unfoldr)
+import System.Random (genByteString, mkStdGen)
+
+main :: IO ()
+main =
+    defaultMain
+        -- Run against some somewhat arbitrarily chosen configurations.  Notably,
+        -- though, 94/100 matches the numbers recorded in the readme.
+        [ env (setupFEC 2 3) makeFECBenchmarks
+        , env (setupFEC 16 31) makeFECBenchmarks
+        , env (setupFEC 94 100) makeFECBenchmarks
+        ]
+  where
+    setupFEC :: Int -> Int -> IO FECParams
+    setupFEC k n = do
+        initialize
+        pure (fec k n)
+
+    makeFECBenchmarks = fecGroup [10 ^ 6]
+
+    fecGroup sizes params =
+        bgroup
+            (show (paramK params) <> "/" <> show (paramN params))
+            ( []
+                ++ (decodePrimaryBenchmark params <$> sizes)
+                ++ (decodeSecondaryBenchmark params <$> sizes)
+                ++ (encodeBenchmark params <$> sizes)
+            )
+
+    encodeBenchmark params size =
+        env (setupBlocks (paramK params) size) $
+            benchmarkEncode params
+    decodePrimaryBenchmark params size =
+        env (setupBlocks (paramK params) size) $
+            benchmarkPrimaryDecode params
+    decodeSecondaryBenchmark params size =
+        env (setupBlocks (paramK params) size) $
+            benchmarkSecondaryDecode params
+
+    setupBlocks :: Int -> Int -> IO [B.ByteString]
+    setupBlocks k blockSize = pure $ makeBlocks k blockSize
+
+    benchmarkEncode params blocks =
+        bench ("encode blockSize=" <> showWithUnit (B.length $ head blocks)) $
+            -- We choose normal form here because the typical thing to do with the
+            -- result is serialize use all of the bytes (eg, to write them to a
+            -- file or send them over the network) so they will certainly all be
+            -- used.
+            nf (uncurry encode) (params, blocks)
+
+    benchmarkPrimaryDecode params blocks =
+        bench ("decode [0..] blockSize=" <> showWithUnit (B.length $ head blocks)) $
+            -- normal form here for the same reason as in benchmarkEncode.
+            -- assign block numbers to use only primary blocks
+            nf (uncurry decode) (params, (zip [0 ..] blocks))
+
+    benchmarkSecondaryDecode params blocks =
+        bench ("decode [" <> show n <> "..] blockSize=" <> showWithUnit (B.length $ head blocks)) $
+            -- normal form here for the same reason as in benchmarkEncode.
+            -- assign block numbers to use as many non-primary blocks as
+            -- possible
+            nf (uncurry decode) (params, (zip [n ..] blocks))
+      where
+        n = paramN params - paramK params
+
+makeBlocks :: Int -> Int -> [B.ByteString]
+makeBlocks k size = take k . go $ mkStdGen 42
+  where
+    go = uncurry ($) . bimap (:) go . genByteString size
+
+data BytesUnit = B | KB | MB deriving (Eq, Ord, Enum, Show, Bounded)
+
+bestUnit :: Int -> BytesUnit
+bestUnit n
+    | n < 1000 = minBound
+    | maxBound == nextUnit = nextUnit
+    | otherwise = succ nextUnit
+  where
+    nextUnit = bestUnit . (`div` 1000) $ n
+
+showWithUnit :: Int -> String
+showWithUnit n = show (scale n) <> show u
+  where
+    scale n = n `div` (10 ^ (3 * fromEnum u))
+
+    u = bestUnit n
diff --git a/fec.cabal b/fec.cabal
@@ -31,6 +31,7 @@ library
   build-depends:
     , base
     , bytestring  >=0.9
+    , deepseq
     , extra
 
   exposed-modules:    Codec.FEC
@@ -41,6 +42,19 @@ library
   cc-options:         -std=c99
   include-dirs:       zfec
 
+executable benchmark-zfec
+  main-is:          Main.hs
+  ghc-options:      -threaded
+  build-depends:
+    , base        ^>=4.14.3.0
+    , bytestring
+    , criterion
+    , fec
+    , random
+
+  hs-source-dirs:   benchmark-zfec
+  default-language: Haskell2010
+
 test-suite tests
   type:             exitcode-stdio-1.0
   main-is:          FECTest.hs

diff --git a/haskell/Codec/FEC.hs b/haskell/Codec/FEC.hs
@@ -1,5 +1,8 @@
+{-# LANGUAGE DeriveGeneric #-}
 {-# LANGUAGE EmptyDataDecls #-}
+{-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE NamedFieldPuns #-}
 
 {- |
  Module:    Codec.FEC
@@ -31,6 +34,7 @@ module Codec.FEC (
 ) where
 
 import Control.Concurrent.Extra (Lock, newLock, withLock)
+import Control.DeepSeq (NFData (rnf))
 import Control.Exception (Exception, throwIO)
 import Data.Bits (xor)
 import qualified Data.ByteString as B
@@ -47,15 +51,27 @@ import Foreign.Marshal.Alloc (allocaBytes)
 import Foreign.Marshal.Array (advancePtr, withArray)
 import Foreign.Ptr (FunPtr, Ptr, castPtr, nullPtr)
 import Foreign.Storable (poke, sizeOf)
+import GHC.Generics (Generic)
 import System.IO (IOMode (..), withFile)
 import System.IO.Unsafe (unsafePerformIO)
 
 data CFEC
 data FECParams = FECParams
-    { _cfec :: ForeignPtr CFEC
+    { _cfec :: !(ForeignPtr CFEC)
     , paramK :: Int
     , paramN :: Int
     }
+    deriving (Generic)
+
+-- Provide an NFData instance so it's possible to use a FECParams in a
+-- Criterion benchmark.
+instance NFData FECParams where
+    rnf FECParams{_cfec, paramK, paramN} =
+        -- ForeignPtr has no NFData instance and I don't know how to implement
+        -- one for it so we punt on it here.  We do make it strict in the
+        -- record definition which at least shallowly evaluates the
+        -- ForeignPtr which is ... part of the job?
+        rnf paramK `seq` rnf paramN
 
 instance Show FECParams where
     show (FECParams _ k n) = "FEC (" ++ show k ++ ", " ++ show n ++ ")"