where-simd-helps/analysis/pkg/parse/parse.go

190 lines
4.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package parse reads pqc-bench .out files produced by the SLURM harness.
//
// Each file contains a SLURM prolog header followed by 1N "loop spin" blocks.
// Each spin block reports one median+average pair per benchmarked operation.
package parse
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
// Meta holds the SLURM prolog metadata extracted from the file header.
type Meta struct {
JobID string
JobName string
Node string
StartedAt string
Directory string
// Explicit fields emitted by submit.sh for reliable downstream parsing.
BenchVariant string
BenchParam string
BenchNSpins string
}
// Measurement is a single operation's reported statistics for one loop spin.
type Measurement struct {
Median int64
Average int64
}
// Run holds everything parsed from one .out file.
type Run struct {
File string
Meta Meta
// Spins[i] maps operation name → measurement for loop spin i+1.
Spins []map[string]Measurement
}
// ParseFile reads a single .out file and returns a Run.
func ParseFile(path string) (*Run, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
run := &Run{File: path}
scanner := bufio.NewScanner(f)
// Default buffer size is 64KB; lines are short so this is fine.
var currentSpin map[string]Measurement
var currentOp string
var pendingMedian int64
inSpin := false
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// SLURM prolog lines start with ##
if strings.HasPrefix(line, "##") {
parsePrologLine(line, &run.Meta)
continue
}
// New loop spin
if strings.HasPrefix(line, "Loop spin:") {
if inSpin && currentSpin != nil {
run.Spins = append(run.Spins, currentSpin)
}
currentSpin = make(map[string]Measurement)
currentOp = ""
inSpin = true
continue
}
if !inSpin {
continue
}
// Operation name line ends with ':'
if strings.HasSuffix(line, ":") && !strings.HasPrefix(line, "median") && !strings.HasPrefix(line, "average") {
currentOp = strings.TrimSuffix(line, ":")
currentOp = strings.TrimSpace(currentOp)
continue
}
if currentOp == "" {
continue
}
if strings.HasPrefix(line, "median:") {
v, err := parseCycles(line)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
pendingMedian = v
continue
}
if strings.HasPrefix(line, "average:") {
avg, err := parseCycles(line)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
currentSpin[currentOp] = Measurement{Median: pendingMedian, Average: avg}
currentOp = ""
pendingMedian = 0
continue
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
// Flush last spin
if inSpin && currentSpin != nil {
run.Spins = append(run.Spins, currentSpin)
}
return run, nil
}
// parseCycles extracts the integer from lines like "median: 25194 cycles/ticks".
func parseCycles(line string) (int64, error) {
// Format: "<label>: <N> cycles/ticks"
parts := strings.Fields(line)
if len(parts) < 2 {
return 0, fmt.Errorf("unexpected line format: %q", line)
}
return strconv.ParseInt(parts[1], 10, 64)
}
// parsePrologLine extracts key/value pairs from SLURM header lines.
func parsePrologLine(line string, meta *Meta) {
// Lines look like: "## Job ID : 11233228"
// Strip leading "##" and optional decoration lines ("####...")
trimmed := strings.TrimLeft(line, "#")
trimmed = strings.TrimSpace(trimmed)
key, val, ok := strings.Cut(trimmed, ":")
if !ok {
return
}
key = strings.TrimSpace(key)
val = strings.TrimSpace(val)
switch key {
case "Job ID":
meta.JobID = val
case "Job Name":
meta.JobName = val
case "Nodelist":
meta.Node = val
case "Job Started":
meta.StartedAt = val
case "Directory":
meta.Directory = val
case "BENCH_VARIANT":
meta.BenchVariant = val
case "BENCH_PARAM":
meta.BenchParam = val
case "BENCH_NSPINS":
meta.BenchNSpins = val
}
}
// InferVariant returns the benchmark variant for a run.
//
// Priority:
// 1. Explicit BENCH_VARIANT metadata emitted by submit.sh (most reliable).
// 2. The path segment immediately following "kyber/" in the SLURM Directory
// field (works for old-style runs that ran from inside the kyber tree).
// 3. "unknown" if neither is available.
func InferVariant(meta Meta) string {
if meta.BenchVariant != "" {
return meta.BenchVariant
}
const marker = "kyber/"
idx := strings.LastIndex(meta.Directory, marker)
if idx < 0 {
return "unknown"
}
rest := meta.Directory[idx+len(marker):]
variant, _, _ := strings.Cut(rest, "/")
return variant
}