#!/usr/bin/env bash # Run LeVCS Tier 1 benchmarks reproducibly. # # Usage: # scripts/bench.sh Full criterion run (~few minutes per bench) # scripts/bench.sh --quick Short measurement window for smoke testing # scripts/bench.sh --flamegraph Generate a flamegraph SVG per bench # scripts/bench.sh --output-dir DIR Override the output directory # scripts/bench.sh --bench NAME Run only one bench (pack_codec|object_hash|textual_merge) # scripts/bench.sh -h | --help Show this message # # Output layout: bench-results/-/ # metadata.txt rustc / cpu / mem / git context for the run # -.txt captured criterion stdout per bench # -.svg flamegraph SVGs (when --flamegraph is used) # summary.txt headline time/throughput pulled out of each run # criterion-html/ copy of criterion's HTML reports (target/criterion) # # Notes for cluster runs: # * --flamegraph requires `cargo install flamegraph` and the `perf` tool. # * Many cluster nodes set kernel.perf_event_paranoid >= 2, which blocks # unprivileged perf. The script warns but does not attempt to fix it. # * Criterion writes its own results to target/criterion/ regardless of # --output-dir; that directory is what's used for run-to-run diffing. set -euo pipefail MODE="criterion" TIMING="default" OUTPUT_DIR="" WARM_UP=3 MEASURE=5 SAMPLES=100 ONLY_BENCH="" while [[ $# -gt 0 ]]; do case "$1" in --quick) TIMING="quick" WARM_UP=1 MEASURE=2 SAMPLES=30 shift ;; --flamegraph) MODE="flamegraph" shift ;; --output-dir) OUTPUT_DIR="$2" shift 2 ;; --bench) ONLY_BENCH="$2" shift 2 ;; -h|--help) sed -n '2,/^$/p' "$0" | sed -E 's/^# ?//' exit 0 ;; *) echo "error: unknown argument '$1'" >&2 echo "run with --help for usage" >&2 exit 1 ;; esac done SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) cd "$REPO_ROOT" HOST=$(hostname -s 2>/dev/null || echo unknown) STAMP=$(date -u +%Y%m%dT%H%M%SZ) OUTPUT_DIR=${OUTPUT_DIR:-bench-results/${HOST}-${STAMP}} mkdir -p "$OUTPUT_DIR" echo "==> output: $OUTPUT_DIR" echo "==> mode: $MODE ($TIMING — warm_up=${WARM_UP}s, measure=${MEASURE}s, samples=$SAMPLES)" echo # Capture machine + toolchain context. Anything that could explain a # perf delta between two runs lives here so cluster results stay # attributable. { echo "=== rustc ===" rustc --version 2>&1 || true echo echo "=== cargo ===" cargo --version 2>&1 || true echo echo "=== uname ===" uname -a 2>&1 || true echo if [[ -r /proc/cpuinfo ]]; then echo "=== cpu ===" grep -m1 'model name' /proc/cpuinfo || true printf "cores: %d\n" "$(grep -c '^processor' /proc/cpuinfo)" || true echo fi if command -v free >/dev/null 2>&1; then echo "=== mem ===" free -h 2>&1 || true echo fi if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then echo "=== perf_event_paranoid ===" cat /proc/sys/kernel/perf_event_paranoid echo fi echo "=== git ===" git rev-parse HEAD 2>/dev/null || echo "(not a git checkout)" git status --porcelain 2>/dev/null | head -20 || true echo echo "=== bench config ===" echo "mode=$MODE" echo "timing=$TIMING" echo "warm_up=${WARM_UP}s" echo "measure=${MEASURE}s" echo "samples=$SAMPLES" echo "only=${ONLY_BENCH:-}" } > "$OUTPUT_DIR/metadata.txt" BENCHES=( "levcs-protocol pack_codec" "levcs-core object_hash" "levcs-merge textual_merge" ) if [[ -n "$ONLY_BENCH" ]]; then FILTERED=() for entry in "${BENCHES[@]}"; do bench=$(awk '{print $2}' <<<"$entry") if [[ "$bench" == "$ONLY_BENCH" ]]; then FILTERED+=("$entry") fi done if [[ ${#FILTERED[@]} -eq 0 ]]; then echo "error: --bench '$ONLY_BENCH' not in registered list" >&2 echo "available: pack_codec, object_hash, textual_merge" >&2 exit 1 fi BENCHES=("${FILTERED[@]}") fi if [[ "$MODE" == "flamegraph" ]]; then if ! command -v cargo-flamegraph >/dev/null 2>&1; then echo "error: cargo-flamegraph not found on PATH" >&2 echo "install with: cargo install flamegraph" >&2 exit 1 fi if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then PARANOID=$(cat /proc/sys/kernel/perf_event_paranoid) if [[ "$PARANOID" -gt 1 ]]; then echo "warning: kernel.perf_event_paranoid=$PARANOID — perf may be blocked" >&2 echo " try: sudo sysctl kernel.perf_event_paranoid=1" >&2 echo fi fi fi for entry in "${BENCHES[@]}"; do crate=$(awk '{print $1}' <<<"$entry") bench=$(awk '{print $2}' <<<"$entry") out="$OUTPUT_DIR/${crate}-${bench}.txt" echo "==> running $crate :: $bench" if [[ "$MODE" == "flamegraph" ]]; then svg="$OUTPUT_DIR/${crate}-${bench}.svg" # cargo-flamegraph runs the bench under perf. The bench binary # needs `--bench` to enter benchmark mode (criterion default). cargo flamegraph -p "$crate" --bench "$bench" \ --output "$svg" -- --bench \ --warm-up-time "$WARM_UP" \ --measurement-time "$MEASURE" \ --sample-size "$SAMPLES" 2>&1 | tee "$out" else cargo bench -p "$crate" --bench "$bench" -- \ --warm-up-time "$WARM_UP" \ --measurement-time "$MEASURE" \ --sample-size "$SAMPLES" 2>&1 | tee "$out" fi echo done # Pull headline numbers out of the captured criterion output. The # format is stable: a label line (no leading whitespace, contains '/'), # then indented `time:` and optional `thrpt:` lines. { echo "# LeVCS bench summary" echo "# host: $HOST stamp: $STAMP" echo "# mode: $MODE timing: $TIMING (warm_up=${WARM_UP}s measure=${MEASURE}s samples=$SAMPLES)" echo for entry in "${BENCHES[@]}"; do crate=$(awk '{print $1}' <<<"$entry") bench=$(awk '{print $2}' <<<"$entry") out="$OUTPUT_DIR/${crate}-${bench}.txt" [[ -f "$out" ]] || continue echo "## $crate :: $bench" # Criterion stanzas have no blank separators; flush on each new # label. Exclude "Benchmarking …" status lines (also contain '/'), # outlier-count lines, and the "change:" stanza below the absolute # numbers. Keep only the first time:/thrpt: per label so the # change-vs-baseline lines do not overwrite the absolute ones. awk ' function flush() { if (label != "" && time != "") { if (thrpt != "") printf " %-44s %s %s\n", label, time, thrpt else printf " %-44s %s\n", label, time } label=""; time=""; thrpt="" } /^Benchmarking / { next } /^Found / { next } /^[^[:space:]].*\// { flush(); label=$0; next } /time:/ { if (label != "" && time == "") { sub(/^[[:space:]]+/, "") time=$0 } next } /thrpt:/ { if (label != "" && thrpt == "") { sub(/^[[:space:]]+/, "") thrpt=$0 } next } END { flush() } ' "$out" echo done } > "$OUTPUT_DIR/summary.txt" # Snapshot criterion's HTML reports. These contain the full distribution # plots and are what you'd open to compare two runs visually. if [[ -d target/criterion ]]; then cp -r target/criterion "$OUTPUT_DIR/criterion-html" fi echo "==> done" echo " summary: $OUTPUT_DIR/summary.txt" echo " HTML: $OUTPUT_DIR/criterion-html/report/index.html" [[ "$MODE" == "flamegraph" ]] && echo " SVGs: $OUTPUT_DIR/*.svg"