LeVCS/scripts/bench.sh

245 lines
8.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# Run LeVCS Tier 1 benchmarks reproducibly.
#
# Usage:
# scripts/bench.sh Full criterion run (~few minutes per bench)
# scripts/bench.sh --quick Short measurement window for smoke testing
# scripts/bench.sh --flamegraph Generate a flamegraph SVG per bench
# scripts/bench.sh --output-dir DIR Override the output directory
# scripts/bench.sh --bench NAME Run only one bench (pack_codec|object_hash|textual_merge)
# scripts/bench.sh -h | --help Show this message
#
# Output layout: bench-results/<host>-<UTC-timestamp>/
# metadata.txt rustc / cpu / mem / git context for the run
# <crate>-<bench>.txt captured criterion stdout per bench
# <crate>-<bench>.svg flamegraph SVGs (when --flamegraph is used)
# summary.txt headline time/throughput pulled out of each run
# criterion-html/ copy of criterion's HTML reports (target/criterion)
#
# Notes for cluster runs:
# * --flamegraph requires `cargo install flamegraph` and the `perf` tool.
# * Many cluster nodes set kernel.perf_event_paranoid >= 2, which blocks
# unprivileged perf. The script warns but does not attempt to fix it.
# * Criterion writes its own results to target/criterion/ regardless of
# --output-dir; that directory is what's used for run-to-run diffing.
set -euo pipefail
MODE="criterion"
TIMING="default"
OUTPUT_DIR=""
WARM_UP=3
MEASURE=5
SAMPLES=100
ONLY_BENCH=""
while [[ $# -gt 0 ]]; do
case "$1" in
--quick)
TIMING="quick"
WARM_UP=1
MEASURE=2
SAMPLES=30
shift
;;
--flamegraph)
MODE="flamegraph"
shift
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--bench)
ONLY_BENCH="$2"
shift 2
;;
-h|--help)
sed -n '2,/^$/p' "$0" | sed -E 's/^# ?//'
exit 0
;;
*)
echo "error: unknown argument '$1'" >&2
echo "run with --help for usage" >&2
exit 1
;;
esac
done
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
cd "$REPO_ROOT"
HOST=$(hostname -s 2>/dev/null || echo unknown)
STAMP=$(date -u +%Y%m%dT%H%M%SZ)
OUTPUT_DIR=${OUTPUT_DIR:-bench-results/${HOST}-${STAMP}}
mkdir -p "$OUTPUT_DIR"
echo "==> output: $OUTPUT_DIR"
echo "==> mode: $MODE ($TIMING — warm_up=${WARM_UP}s, measure=${MEASURE}s, samples=$SAMPLES)"
echo
# Capture machine + toolchain context. Anything that could explain a
# perf delta between two runs lives here so cluster results stay
# attributable.
{
echo "=== rustc ==="
rustc --version 2>&1 || true
echo
echo "=== cargo ==="
cargo --version 2>&1 || true
echo
echo "=== uname ==="
uname -a 2>&1 || true
echo
if [[ -r /proc/cpuinfo ]]; then
echo "=== cpu ==="
grep -m1 'model name' /proc/cpuinfo || true
printf "cores: %d\n" "$(grep -c '^processor' /proc/cpuinfo)" || true
echo
fi
if command -v free >/dev/null 2>&1; then
echo "=== mem ==="
free -h 2>&1 || true
echo
fi
if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then
echo "=== perf_event_paranoid ==="
cat /proc/sys/kernel/perf_event_paranoid
echo
fi
echo "=== git ==="
git rev-parse HEAD 2>/dev/null || echo "(not a git checkout)"
git status --porcelain 2>/dev/null | head -20 || true
echo
echo "=== bench config ==="
echo "mode=$MODE"
echo "timing=$TIMING"
echo "warm_up=${WARM_UP}s"
echo "measure=${MEASURE}s"
echo "samples=$SAMPLES"
echo "only=${ONLY_BENCH:-<all>}"
} > "$OUTPUT_DIR/metadata.txt"
BENCHES=(
"levcs-protocol pack_codec"
"levcs-core object_hash"
"levcs-merge textual_merge"
)
if [[ -n "$ONLY_BENCH" ]]; then
FILTERED=()
for entry in "${BENCHES[@]}"; do
bench=$(awk '{print $2}' <<<"$entry")
if [[ "$bench" == "$ONLY_BENCH" ]]; then
FILTERED+=("$entry")
fi
done
if [[ ${#FILTERED[@]} -eq 0 ]]; then
echo "error: --bench '$ONLY_BENCH' not in registered list" >&2
echo "available: pack_codec, object_hash, textual_merge" >&2
exit 1
fi
BENCHES=("${FILTERED[@]}")
fi
if [[ "$MODE" == "flamegraph" ]]; then
if ! command -v cargo-flamegraph >/dev/null 2>&1; then
echo "error: cargo-flamegraph not found on PATH" >&2
echo "install with: cargo install flamegraph" >&2
exit 1
fi
if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then
PARANOID=$(cat /proc/sys/kernel/perf_event_paranoid)
if [[ "$PARANOID" -gt 1 ]]; then
echo "warning: kernel.perf_event_paranoid=$PARANOID — perf may be blocked" >&2
echo " try: sudo sysctl kernel.perf_event_paranoid=1" >&2
echo
fi
fi
fi
for entry in "${BENCHES[@]}"; do
crate=$(awk '{print $1}' <<<"$entry")
bench=$(awk '{print $2}' <<<"$entry")
out="$OUTPUT_DIR/${crate}-${bench}.txt"
echo "==> running $crate :: $bench"
if [[ "$MODE" == "flamegraph" ]]; then
svg="$OUTPUT_DIR/${crate}-${bench}.svg"
# cargo-flamegraph runs the bench under perf. The bench binary
# needs `--bench` to enter benchmark mode (criterion default).
cargo flamegraph -p "$crate" --bench "$bench" \
--output "$svg" -- --bench \
--warm-up-time "$WARM_UP" \
--measurement-time "$MEASURE" \
--sample-size "$SAMPLES" 2>&1 | tee "$out"
else
cargo bench -p "$crate" --bench "$bench" -- \
--warm-up-time "$WARM_UP" \
--measurement-time "$MEASURE" \
--sample-size "$SAMPLES" 2>&1 | tee "$out"
fi
echo
done
# Pull headline numbers out of the captured criterion output. The
# format is stable: a label line (no leading whitespace, contains '/'),
# then indented `time:` and optional `thrpt:` lines.
{
echo "# LeVCS bench summary"
echo "# host: $HOST stamp: $STAMP"
echo "# mode: $MODE timing: $TIMING (warm_up=${WARM_UP}s measure=${MEASURE}s samples=$SAMPLES)"
echo
for entry in "${BENCHES[@]}"; do
crate=$(awk '{print $1}' <<<"$entry")
bench=$(awk '{print $2}' <<<"$entry")
out="$OUTPUT_DIR/${crate}-${bench}.txt"
[[ -f "$out" ]] || continue
echo "## $crate :: $bench"
# Criterion stanzas have no blank separators; flush on each new
# label. Exclude "Benchmarking …" status lines (also contain '/'),
# outlier-count lines, and the "change:" stanza below the absolute
# numbers. Keep only the first time:/thrpt: per label so the
# change-vs-baseline lines do not overwrite the absolute ones.
awk '
function flush() {
if (label != "" && time != "") {
if (thrpt != "") printf " %-44s %s %s\n", label, time, thrpt
else printf " %-44s %s\n", label, time
}
label=""; time=""; thrpt=""
}
/^Benchmarking / { next }
/^Found / { next }
/^[^[:space:]].*\// { flush(); label=$0; next }
/time:/ {
if (label != "" && time == "") {
sub(/^[[:space:]]+/, "")
time=$0
}
next
}
/thrpt:/ {
if (label != "" && thrpt == "") {
sub(/^[[:space:]]+/, "")
thrpt=$0
}
next
}
END { flush() }
' "$out"
echo
done
} > "$OUTPUT_DIR/summary.txt"
# Snapshot criterion's HTML reports. These contain the full distribution
# plots and are what you'd open to compare two runs visually.
if [[ -d target/criterion ]]; then
cp -r target/criterion "$OUTPUT_DIR/criterion-html"
fi
echo "==> done"
echo " summary: $OUTPUT_DIR/summary.txt"
echo " HTML: $OUTPUT_DIR/criterion-html/report/index.html"
[[ "$MODE" == "flamegraph" ]] && echo " SVGs: $OUTPUT_DIR/*.svg"