134 lines
3.1 KiB
Go
134 lines
3.1 KiB
Go
// Package stats computes summary statistics over slices of cycle counts.
|
|
package stats
|
|
|
|
import (
|
|
"cmp"
|
|
"math"
|
|
"math/rand/v2"
|
|
"slices"
|
|
)
|
|
|
|
const bootstrapN = 10_000
|
|
|
|
// Summary holds all computed statistics for one (algorithm, variant, operation) group.
|
|
type Summary struct {
|
|
N int
|
|
Mean float64
|
|
// Median is the sample median (p50).
|
|
Median float64
|
|
Std float64
|
|
MAD float64
|
|
P5 float64
|
|
P25 float64
|
|
P75 float64
|
|
P95 float64
|
|
P99 float64
|
|
// CI95 is the bootstrapped 95% confidence interval for the median.
|
|
CI95 [2]float64
|
|
}
|
|
|
|
// Compute derives all statistics from a sorted (ascending) slice of values.
|
|
// The caller must sort the slice before passing it in.
|
|
func Compute(sorted []int64) Summary {
|
|
n := len(sorted)
|
|
if n == 0 {
|
|
return Summary{}
|
|
}
|
|
|
|
s := Summary{N: n}
|
|
s.Mean = mean(sorted)
|
|
s.Median = percentileFromSorted(sorted, 50)
|
|
s.Std = stddev(sorted, s.Mean)
|
|
s.MAD = mad(sorted, s.Median)
|
|
s.P5 = percentileFromSorted(sorted, 5)
|
|
s.P25 = percentileFromSorted(sorted, 25)
|
|
s.P75 = percentileFromSorted(sorted, 75)
|
|
s.P95 = percentileFromSorted(sorted, 95)
|
|
s.P99 = percentileFromSorted(sorted, 99)
|
|
s.CI95 = bootstrapMedianCI(sorted, bootstrapN)
|
|
|
|
return s
|
|
}
|
|
|
|
func mean(xs []int64) float64 {
|
|
var sum float64
|
|
for _, x := range xs {
|
|
sum += float64(x)
|
|
}
|
|
return sum / float64(len(xs))
|
|
}
|
|
|
|
func stddev(xs []int64, m float64) float64 {
|
|
var variance float64
|
|
for _, x := range xs {
|
|
d := float64(x) - m
|
|
variance += d * d
|
|
}
|
|
return math.Sqrt(variance / float64(len(xs)))
|
|
}
|
|
|
|
func mad(sorted []int64, median float64) float64 {
|
|
devs := make([]float64, len(sorted))
|
|
for i, x := range sorted {
|
|
devs[i] = math.Abs(float64(x) - median)
|
|
}
|
|
slices.Sort(devs)
|
|
n := len(devs)
|
|
if n%2 == 0 {
|
|
return (devs[n/2-1] + devs[n/2]) / 2
|
|
}
|
|
return devs[n/2]
|
|
}
|
|
|
|
// percentileFromSorted uses linear interpolation (same as numpy's default).
|
|
func percentileFromSorted(sorted []int64, p float64) float64 {
|
|
n := float64(len(sorted))
|
|
if n == 1 {
|
|
return float64(sorted[0])
|
|
}
|
|
rank := p / 100 * (n - 1)
|
|
lo := int(math.Floor(rank))
|
|
hi := int(math.Ceil(rank))
|
|
frac := rank - float64(lo)
|
|
return float64(sorted[lo])*(1-frac) + float64(sorted[hi])*frac
|
|
}
|
|
|
|
// bootstrapMedianCI resamples the data bootstrapN times and returns the
|
|
// [2.5th, 97.5th] percentile of the bootstrap median distribution.
|
|
func bootstrapMedianCI(sorted []int64, iters int) [2]float64 {
|
|
n := len(sorted)
|
|
buf := make([]int64, n)
|
|
medians := make([]float64, iters)
|
|
|
|
for i := range iters {
|
|
for j := range n {
|
|
buf[j] = sorted[rand.IntN(n)]
|
|
}
|
|
slices.Sort(buf)
|
|
medians[i] = percentileFromSorted(buf, 50)
|
|
}
|
|
|
|
slices.Sort(medians)
|
|
return [2]float64{
|
|
percentile64(medians, 2.5),
|
|
percentile64(medians, 97.5),
|
|
}
|
|
}
|
|
|
|
func percentile64(sorted []float64, p float64) float64 {
|
|
n := float64(len(sorted))
|
|
if n == 1 {
|
|
return sorted[0]
|
|
}
|
|
rank := p / 100 * (n - 1)
|
|
lo := int(math.Floor(rank))
|
|
hi := int(math.Ceil(rank))
|
|
frac := rank - float64(lo)
|
|
return sorted[lo]*(1-frac) + sorted[hi]*frac
|
|
}
|
|
|
|
// SortInt64 sorts a slice of int64 in place (ascending).
|
|
func SortInt64(xs []int64) {
|
|
slices.SortFunc(xs, cmp.Compare)
|
|
}
|