where-simd-helps/analysis/cmd/aggregate/main.go

216 lines
5.8 KiB
Go

// aggregate parses pqc-bench .out files and emits summary statistics as JSON.
//
// Usage:
//
// aggregate [--raw] [--out results.json] <data-dir>
//
// It walks <data-dir> for all *.out files, grouping results by the parent
// directory name (algorithm) and the variant inferred from the SLURM header.
// Output is a JSON array of result objects, one per (algorithm, variant,
// operation) triple.
package main
import (
"encoding/json"
"flag"
"fmt"
"io/fs"
"os"
"path/filepath"
"slices"
"strings"
"git.levineuwirth.org/neuwirth/where-simd-helps/analysis/pkg/parse"
"git.levineuwirth.org/neuwirth/where-simd-helps/analysis/pkg/stats"
)
// Result is one output record: all statistics for a single
// (algorithm, variant, operation) group.
type Result struct {
Algorithm string `json:"algorithm"`
Variant string `json:"variant"`
Operation string `json:"operation"`
Unit string `json:"unit"`
NObservations int `json:"n_observations"`
NRuns int `json:"n_runs"`
Median float64 `json:"median"`
Mean float64 `json:"mean"`
Std float64 `json:"std"`
MAD float64 `json:"mad"`
P5 float64 `json:"p5"`
P25 float64 `json:"p25"`
P75 float64 `json:"p75"`
P95 float64 `json:"p95"`
P99 float64 `json:"p99"`
CI95 [2]float64 `json:"ci95"`
Node string `json:"node"`
Sources []string `json:"sources"`
Raw []int64 `json:"raw,omitempty"`
}
// groupKey uniquely identifies a (algorithm, variant, operation) combination.
type groupKey struct {
algorithm, variant, operation string
}
func main() {
rawFlag := flag.Bool("raw", false, "include per-observation cycle counts in output")
outFlag := flag.String("out", "", "write JSON output to this file instead of stdout")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: aggregate [--raw] [--out FILE] <data-dir>\n")
flag.PrintDefaults()
}
flag.Parse()
if flag.NArg() != 1 {
flag.Usage()
os.Exit(1)
}
dataDir := flag.Arg(0)
// Collect all .out files.
var outFiles []string
err := filepath.WalkDir(dataDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() && strings.HasSuffix(path, ".out") {
outFiles = append(outFiles, path)
}
return nil
})
if err != nil {
fmt.Fprintf(os.Stderr, "error walking %s: %v\n", dataDir, err)
os.Exit(1)
}
if len(outFiles) == 0 {
fmt.Fprintf(os.Stderr, "no .out files found under %s\n", dataDir)
os.Exit(1)
}
// Parse every file and accumulate observations per group.
type accumulator struct {
values []int64
sources []string
node string
}
groups := make(map[groupKey]*accumulator)
for _, path := range outFiles {
run, err := parse.ParseFile(path)
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s: %v\n", path, err)
continue
}
algorithm := inferAlgorithm(run.Meta, path)
variant := parse.InferVariant(run.Meta)
for _, spin := range run.Spins {
for op, m := range spin {
key := groupKey{algorithm, variant, op}
acc := groups[key]
if acc == nil {
acc = &accumulator{node: run.Meta.Node}
groups[key] = acc
}
acc.values = append(acc.values, m.Median)
}
}
// Record sources per group (any key with this algorithm+variant).
for key, acc := range groups {
if key.algorithm == algorithm && key.variant == variant {
if !slices.Contains(acc.sources, path) {
acc.sources = append(acc.sources, path)
}
}
}
}
// Build results.
results := make([]Result, 0, len(groups))
for key, acc := range groups {
sorted := make([]int64, len(acc.values))
copy(sorted, acc.values)
stats.SortInt64(sorted)
s := stats.Compute(sorted)
r := Result{
Algorithm: key.algorithm,
Variant: key.variant,
Operation: key.operation,
Unit: "cycles",
NObservations: s.N,
NRuns: len(acc.sources),
Median: s.Median,
Mean: s.Mean,
Std: s.Std,
MAD: s.MAD,
P5: s.P5,
P25: s.P25,
P75: s.P75,
P95: s.P95,
P99: s.P99,
CI95: s.CI95,
Node: acc.node,
Sources: acc.sources,
}
if *rawFlag {
r.Raw = acc.values
}
results = append(results, r)
}
// Sort for stable output: algorithm → variant → operation.
slices.SortFunc(results, func(a, b Result) int {
if a.Algorithm != b.Algorithm {
return strings.Compare(a.Algorithm, b.Algorithm)
}
if a.Variant != b.Variant {
return strings.Compare(a.Variant, b.Variant)
}
return strings.Compare(a.Operation, b.Operation)
})
out, err := json.MarshalIndent(results, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "error marshalling JSON: %v\n", err)
os.Exit(1)
}
if *outFlag != "" {
if err := os.WriteFile(*outFlag, out, 0o644); err != nil {
fmt.Fprintf(os.Stderr, "error writing %s: %v\n", *outFlag, err)
os.Exit(1)
}
fmt.Fprintf(os.Stderr, "wrote %d results to %s\n", len(results), *outFlag)
} else {
fmt.Println(string(out))
}
}
// inferAlgorithm returns the algorithm name (e.g. "mlkem512") for a run.
//
// Priority:
// 1. BENCH_PARAM metadata → "mlkem{PARAM}" (new-style runs via submit.sh)
// 2. Walk the file path upward for a segment matching "mlkem\d+" (handles
// both flat old-style layout and new nested layout transparently)
// 3. The immediate parent directory name as a last resort.
func inferAlgorithm(meta parse.Meta, filePath string) string {
if meta.BenchParam != "" {
return "mlkem" + meta.BenchParam
}
// Walk path components looking for mlkem\d+.
dir := filepath.Dir(filePath)
for dir != "." && dir != "/" {
base := filepath.Base(dir)
if strings.HasPrefix(base, "mlkem") {
return base
}
dir = filepath.Dir(dir)
}
return filepath.Base(filepath.Dir(filePath))
}