245 lines
8.1 KiB
Bash
Executable File
245 lines
8.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Run LeVCS Tier 1 benchmarks reproducibly.
|
|
#
|
|
# Usage:
|
|
# scripts/bench.sh Full criterion run (~few minutes per bench)
|
|
# scripts/bench.sh --quick Short measurement window for smoke testing
|
|
# scripts/bench.sh --flamegraph Generate a flamegraph SVG per bench
|
|
# scripts/bench.sh --output-dir DIR Override the output directory
|
|
# scripts/bench.sh --bench NAME Run only one bench (pack_codec|object_hash|textual_merge)
|
|
# scripts/bench.sh -h | --help Show this message
|
|
#
|
|
# Output layout: bench-results/<host>-<UTC-timestamp>/
|
|
# metadata.txt rustc / cpu / mem / git context for the run
|
|
# <crate>-<bench>.txt captured criterion stdout per bench
|
|
# <crate>-<bench>.svg flamegraph SVGs (when --flamegraph is used)
|
|
# summary.txt headline time/throughput pulled out of each run
|
|
# criterion-html/ copy of criterion's HTML reports (target/criterion)
|
|
#
|
|
# Notes for cluster runs:
|
|
# * --flamegraph requires `cargo install flamegraph` and the `perf` tool.
|
|
# * Many cluster nodes set kernel.perf_event_paranoid >= 2, which blocks
|
|
# unprivileged perf. The script warns but does not attempt to fix it.
|
|
# * Criterion writes its own results to target/criterion/ regardless of
|
|
# --output-dir; that directory is what's used for run-to-run diffing.
|
|
|
|
set -euo pipefail
|
|
|
|
MODE="criterion"
|
|
TIMING="default"
|
|
OUTPUT_DIR=""
|
|
WARM_UP=3
|
|
MEASURE=5
|
|
SAMPLES=100
|
|
ONLY_BENCH=""
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--quick)
|
|
TIMING="quick"
|
|
WARM_UP=1
|
|
MEASURE=2
|
|
SAMPLES=30
|
|
shift
|
|
;;
|
|
--flamegraph)
|
|
MODE="flamegraph"
|
|
shift
|
|
;;
|
|
--output-dir)
|
|
OUTPUT_DIR="$2"
|
|
shift 2
|
|
;;
|
|
--bench)
|
|
ONLY_BENCH="$2"
|
|
shift 2
|
|
;;
|
|
-h|--help)
|
|
sed -n '2,/^$/p' "$0" | sed -E 's/^# ?//'
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "error: unknown argument '$1'" >&2
|
|
echo "run with --help for usage" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
|
cd "$REPO_ROOT"
|
|
|
|
HOST=$(hostname -s 2>/dev/null || echo unknown)
|
|
STAMP=$(date -u +%Y%m%dT%H%M%SZ)
|
|
OUTPUT_DIR=${OUTPUT_DIR:-bench-results/${HOST}-${STAMP}}
|
|
mkdir -p "$OUTPUT_DIR"
|
|
echo "==> output: $OUTPUT_DIR"
|
|
echo "==> mode: $MODE ($TIMING — warm_up=${WARM_UP}s, measure=${MEASURE}s, samples=$SAMPLES)"
|
|
echo
|
|
|
|
# Capture machine + toolchain context. Anything that could explain a
|
|
# perf delta between two runs lives here so cluster results stay
|
|
# attributable.
|
|
{
|
|
echo "=== rustc ==="
|
|
rustc --version 2>&1 || true
|
|
echo
|
|
echo "=== cargo ==="
|
|
cargo --version 2>&1 || true
|
|
echo
|
|
echo "=== uname ==="
|
|
uname -a 2>&1 || true
|
|
echo
|
|
if [[ -r /proc/cpuinfo ]]; then
|
|
echo "=== cpu ==="
|
|
grep -m1 'model name' /proc/cpuinfo || true
|
|
printf "cores: %d\n" "$(grep -c '^processor' /proc/cpuinfo)" || true
|
|
echo
|
|
fi
|
|
if command -v free >/dev/null 2>&1; then
|
|
echo "=== mem ==="
|
|
free -h 2>&1 || true
|
|
echo
|
|
fi
|
|
if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then
|
|
echo "=== perf_event_paranoid ==="
|
|
cat /proc/sys/kernel/perf_event_paranoid
|
|
echo
|
|
fi
|
|
echo "=== git ==="
|
|
git rev-parse HEAD 2>/dev/null || echo "(not a git checkout)"
|
|
git status --porcelain 2>/dev/null | head -20 || true
|
|
echo
|
|
echo "=== bench config ==="
|
|
echo "mode=$MODE"
|
|
echo "timing=$TIMING"
|
|
echo "warm_up=${WARM_UP}s"
|
|
echo "measure=${MEASURE}s"
|
|
echo "samples=$SAMPLES"
|
|
echo "only=${ONLY_BENCH:-<all>}"
|
|
} > "$OUTPUT_DIR/metadata.txt"
|
|
|
|
BENCHES=(
|
|
"levcs-protocol pack_codec"
|
|
"levcs-core object_hash"
|
|
"levcs-merge textual_merge"
|
|
)
|
|
|
|
if [[ -n "$ONLY_BENCH" ]]; then
|
|
FILTERED=()
|
|
for entry in "${BENCHES[@]}"; do
|
|
bench=$(awk '{print $2}' <<<"$entry")
|
|
if [[ "$bench" == "$ONLY_BENCH" ]]; then
|
|
FILTERED+=("$entry")
|
|
fi
|
|
done
|
|
if [[ ${#FILTERED[@]} -eq 0 ]]; then
|
|
echo "error: --bench '$ONLY_BENCH' not in registered list" >&2
|
|
echo "available: pack_codec, object_hash, textual_merge" >&2
|
|
exit 1
|
|
fi
|
|
BENCHES=("${FILTERED[@]}")
|
|
fi
|
|
|
|
if [[ "$MODE" == "flamegraph" ]]; then
|
|
if ! command -v cargo-flamegraph >/dev/null 2>&1; then
|
|
echo "error: cargo-flamegraph not found on PATH" >&2
|
|
echo "install with: cargo install flamegraph" >&2
|
|
exit 1
|
|
fi
|
|
if [[ -r /proc/sys/kernel/perf_event_paranoid ]]; then
|
|
PARANOID=$(cat /proc/sys/kernel/perf_event_paranoid)
|
|
if [[ "$PARANOID" -gt 1 ]]; then
|
|
echo "warning: kernel.perf_event_paranoid=$PARANOID — perf may be blocked" >&2
|
|
echo " try: sudo sysctl kernel.perf_event_paranoid=1" >&2
|
|
echo
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
for entry in "${BENCHES[@]}"; do
|
|
crate=$(awk '{print $1}' <<<"$entry")
|
|
bench=$(awk '{print $2}' <<<"$entry")
|
|
out="$OUTPUT_DIR/${crate}-${bench}.txt"
|
|
echo "==> running $crate :: $bench"
|
|
|
|
if [[ "$MODE" == "flamegraph" ]]; then
|
|
svg="$OUTPUT_DIR/${crate}-${bench}.svg"
|
|
# cargo-flamegraph runs the bench under perf. The bench binary
|
|
# needs `--bench` to enter benchmark mode (criterion default).
|
|
cargo flamegraph -p "$crate" --bench "$bench" \
|
|
--output "$svg" -- --bench \
|
|
--warm-up-time "$WARM_UP" \
|
|
--measurement-time "$MEASURE" \
|
|
--sample-size "$SAMPLES" 2>&1 | tee "$out"
|
|
else
|
|
cargo bench -p "$crate" --bench "$bench" -- \
|
|
--warm-up-time "$WARM_UP" \
|
|
--measurement-time "$MEASURE" \
|
|
--sample-size "$SAMPLES" 2>&1 | tee "$out"
|
|
fi
|
|
echo
|
|
done
|
|
|
|
# Pull headline numbers out of the captured criterion output. The
|
|
# format is stable: a label line (no leading whitespace, contains '/'),
|
|
# then indented `time:` and optional `thrpt:` lines.
|
|
{
|
|
echo "# LeVCS bench summary"
|
|
echo "# host: $HOST stamp: $STAMP"
|
|
echo "# mode: $MODE timing: $TIMING (warm_up=${WARM_UP}s measure=${MEASURE}s samples=$SAMPLES)"
|
|
echo
|
|
for entry in "${BENCHES[@]}"; do
|
|
crate=$(awk '{print $1}' <<<"$entry")
|
|
bench=$(awk '{print $2}' <<<"$entry")
|
|
out="$OUTPUT_DIR/${crate}-${bench}.txt"
|
|
[[ -f "$out" ]] || continue
|
|
echo "## $crate :: $bench"
|
|
# Criterion stanzas have no blank separators; flush on each new
|
|
# label. Exclude "Benchmarking …" status lines (also contain '/'),
|
|
# outlier-count lines, and the "change:" stanza below the absolute
|
|
# numbers. Keep only the first time:/thrpt: per label so the
|
|
# change-vs-baseline lines do not overwrite the absolute ones.
|
|
awk '
|
|
function flush() {
|
|
if (label != "" && time != "") {
|
|
if (thrpt != "") printf " %-44s %s %s\n", label, time, thrpt
|
|
else printf " %-44s %s\n", label, time
|
|
}
|
|
label=""; time=""; thrpt=""
|
|
}
|
|
/^Benchmarking / { next }
|
|
/^Found / { next }
|
|
/^[^[:space:]].*\// { flush(); label=$0; next }
|
|
/time:/ {
|
|
if (label != "" && time == "") {
|
|
sub(/^[[:space:]]+/, "")
|
|
time=$0
|
|
}
|
|
next
|
|
}
|
|
/thrpt:/ {
|
|
if (label != "" && thrpt == "") {
|
|
sub(/^[[:space:]]+/, "")
|
|
thrpt=$0
|
|
}
|
|
next
|
|
}
|
|
END { flush() }
|
|
' "$out"
|
|
echo
|
|
done
|
|
} > "$OUTPUT_DIR/summary.txt"
|
|
|
|
# Snapshot criterion's HTML reports. These contain the full distribution
|
|
# plots and are what you'd open to compare two runs visually.
|
|
if [[ -d target/criterion ]]; then
|
|
cp -r target/criterion "$OUTPUT_DIR/criterion-html"
|
|
fi
|
|
|
|
echo "==> done"
|
|
echo " summary: $OUTPUT_DIR/summary.txt"
|
|
echo " HTML: $OUTPUT_DIR/criterion-html/report/index.html"
|
|
[[ "$MODE" == "flamegraph" ]] && echo " SVGs: $OUTPUT_DIR/*.svg"
|