// aggregate parses pqc-bench .out files and emits summary statistics as JSON. // // Usage: // // aggregate [--raw] [--out results.json] // // It walks for all *.out files, grouping results by the parent // directory name (algorithm) and the variant inferred from the SLURM header. // Output is a JSON array of result objects, one per (algorithm, variant, // operation) triple. package main import ( "encoding/json" "flag" "fmt" "io/fs" "os" "path/filepath" "slices" "strings" "git.levineuwirth.org/neuwirth/where-simd-helps/analysis/pkg/parse" "git.levineuwirth.org/neuwirth/where-simd-helps/analysis/pkg/stats" ) // Result is one output record: all statistics for a single // (algorithm, variant, operation) group. type Result struct { Algorithm string `json:"algorithm"` Variant string `json:"variant"` Operation string `json:"operation"` Unit string `json:"unit"` NObservations int `json:"n_observations"` NRuns int `json:"n_runs"` Median float64 `json:"median"` Mean float64 `json:"mean"` Std float64 `json:"std"` MAD float64 `json:"mad"` P5 float64 `json:"p5"` P25 float64 `json:"p25"` P75 float64 `json:"p75"` P95 float64 `json:"p95"` P99 float64 `json:"p99"` CI95 [2]float64 `json:"ci95"` Node string `json:"node"` Sources []string `json:"sources"` Raw []int64 `json:"raw,omitempty"` } // groupKey uniquely identifies a (algorithm, variant, operation) combination. type groupKey struct { algorithm, variant, operation string } func main() { rawFlag := flag.Bool("raw", false, "include per-observation cycle counts in output") outFlag := flag.String("out", "", "write JSON output to this file instead of stdout") flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: aggregate [--raw] [--out FILE] \n") flag.PrintDefaults() } flag.Parse() if flag.NArg() != 1 { flag.Usage() os.Exit(1) } dataDir := flag.Arg(0) // Collect all .out files. var outFiles []string err := filepath.WalkDir(dataDir, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if !d.IsDir() && strings.HasSuffix(path, ".out") { outFiles = append(outFiles, path) } return nil }) if err != nil { fmt.Fprintf(os.Stderr, "error walking %s: %v\n", dataDir, err) os.Exit(1) } if len(outFiles) == 0 { fmt.Fprintf(os.Stderr, "no .out files found under %s\n", dataDir) os.Exit(1) } // Parse every file and accumulate observations per group. type accumulator struct { values []int64 sources []string node string } groups := make(map[groupKey]*accumulator) for _, path := range outFiles { run, err := parse.ParseFile(path) if err != nil { fmt.Fprintf(os.Stderr, "warning: skipping %s: %v\n", path, err) continue } algorithm := inferAlgorithm(run.Meta, path) variant := parse.InferVariant(run.Meta) for _, spin := range run.Spins { for op, m := range spin { key := groupKey{algorithm, variant, op} acc := groups[key] if acc == nil { acc = &accumulator{node: run.Meta.Node} groups[key] = acc } acc.values = append(acc.values, m.Median) } } // Record sources per group (any key with this algorithm+variant). for key, acc := range groups { if key.algorithm == algorithm && key.variant == variant { if !slices.Contains(acc.sources, path) { acc.sources = append(acc.sources, path) } } } } // Build results. results := make([]Result, 0, len(groups)) for key, acc := range groups { sorted := make([]int64, len(acc.values)) copy(sorted, acc.values) stats.SortInt64(sorted) s := stats.Compute(sorted) r := Result{ Algorithm: key.algorithm, Variant: key.variant, Operation: key.operation, Unit: "cycles", NObservations: s.N, NRuns: len(acc.sources), Median: s.Median, Mean: s.Mean, Std: s.Std, MAD: s.MAD, P5: s.P5, P25: s.P25, P75: s.P75, P95: s.P95, P99: s.P99, CI95: s.CI95, Node: acc.node, Sources: acc.sources, } if *rawFlag { r.Raw = acc.values } results = append(results, r) } // Sort for stable output: algorithm → variant → operation. slices.SortFunc(results, func(a, b Result) int { if a.Algorithm != b.Algorithm { return strings.Compare(a.Algorithm, b.Algorithm) } if a.Variant != b.Variant { return strings.Compare(a.Variant, b.Variant) } return strings.Compare(a.Operation, b.Operation) }) out, err := json.MarshalIndent(results, "", " ") if err != nil { fmt.Fprintf(os.Stderr, "error marshalling JSON: %v\n", err) os.Exit(1) } if *outFlag != "" { if err := os.WriteFile(*outFlag, out, 0o644); err != nil { fmt.Fprintf(os.Stderr, "error writing %s: %v\n", *outFlag, err) os.Exit(1) } fmt.Fprintf(os.Stderr, "wrote %d results to %s\n", len(results), *outFlag) } else { fmt.Println(string(out)) } } // inferAlgorithm returns the algorithm name (e.g. "mlkem512") for a run. // // Priority: // 1. BENCH_PARAM metadata → "mlkem{PARAM}" (new-style runs via submit.sh) // 2. Walk the file path upward for a segment matching "mlkem\d+" (handles // both flat old-style layout and new nested layout transparently) // 3. The immediate parent directory name as a last resort. func inferAlgorithm(meta parse.Meta, filePath string) string { if meta.BenchParam != "" { return "mlkem" + meta.BenchParam } // Walk path components looking for mlkem\d+. dir := filepath.Dir(filePath) for dir != "." && dir != "/" { base := filepath.Base(dir) if strings.HasPrefix(base, "mlkem") { return base } dir = filepath.Dir(dir) } return filepath.Base(filepath.Dir(filePath)) }