where-simd-helps/analysis/pkg/stats/stats.go

134 lines
3.1 KiB
Go

// Package stats computes summary statistics over slices of cycle counts.
package stats
import (
"cmp"
"math"
"math/rand/v2"
"slices"
)
const bootstrapN = 10_000
// Summary holds all computed statistics for one (algorithm, variant, operation) group.
type Summary struct {
N int
Mean float64
// Median is the sample median (p50).
Median float64
Std float64
MAD float64
P5 float64
P25 float64
P75 float64
P95 float64
P99 float64
// CI95 is the bootstrapped 95% confidence interval for the median.
CI95 [2]float64
}
// Compute derives all statistics from a sorted (ascending) slice of values.
// The caller must sort the slice before passing it in.
func Compute(sorted []int64) Summary {
n := len(sorted)
if n == 0 {
return Summary{}
}
s := Summary{N: n}
s.Mean = mean(sorted)
s.Median = percentileFromSorted(sorted, 50)
s.Std = stddev(sorted, s.Mean)
s.MAD = mad(sorted, s.Median)
s.P5 = percentileFromSorted(sorted, 5)
s.P25 = percentileFromSorted(sorted, 25)
s.P75 = percentileFromSorted(sorted, 75)
s.P95 = percentileFromSorted(sorted, 95)
s.P99 = percentileFromSorted(sorted, 99)
s.CI95 = bootstrapMedianCI(sorted, bootstrapN)
return s
}
func mean(xs []int64) float64 {
var sum float64
for _, x := range xs {
sum += float64(x)
}
return sum / float64(len(xs))
}
func stddev(xs []int64, m float64) float64 {
var variance float64
for _, x := range xs {
d := float64(x) - m
variance += d * d
}
return math.Sqrt(variance / float64(len(xs)))
}
func mad(sorted []int64, median float64) float64 {
devs := make([]float64, len(sorted))
for i, x := range sorted {
devs[i] = math.Abs(float64(x) - median)
}
slices.Sort(devs)
n := len(devs)
if n%2 == 0 {
return (devs[n/2-1] + devs[n/2]) / 2
}
return devs[n/2]
}
// percentileFromSorted uses linear interpolation (same as numpy's default).
func percentileFromSorted(sorted []int64, p float64) float64 {
n := float64(len(sorted))
if n == 1 {
return float64(sorted[0])
}
rank := p / 100 * (n - 1)
lo := int(math.Floor(rank))
hi := int(math.Ceil(rank))
frac := rank - float64(lo)
return float64(sorted[lo])*(1-frac) + float64(sorted[hi])*frac
}
// bootstrapMedianCI resamples the data bootstrapN times and returns the
// [2.5th, 97.5th] percentile of the bootstrap median distribution.
func bootstrapMedianCI(sorted []int64, iters int) [2]float64 {
n := len(sorted)
buf := make([]int64, n)
medians := make([]float64, iters)
for i := range iters {
for j := range n {
buf[j] = sorted[rand.IntN(n)]
}
slices.Sort(buf)
medians[i] = percentileFromSorted(buf, 50)
}
slices.Sort(medians)
return [2]float64{
percentile64(medians, 2.5),
percentile64(medians, 97.5),
}
}
func percentile64(sorted []float64, p float64) float64 {
n := float64(len(sorted))
if n == 1 {
return sorted[0]
}
rank := p / 100 * (n - 1)
lo := int(math.Floor(rank))
hi := int(math.Ceil(rank))
frac := rank - float64(lo)
return sorted[lo]*(1-frac) + sorted[hi]*frac
}
// SortInt64 sorts a slice of int64 in place (ascending).
func SortInt64(xs []int64) {
slices.SortFunc(xs, cmp.Compare)
}