// Package stats computes summary statistics over slices of cycle counts. package stats import ( "cmp" "math" "math/rand/v2" "slices" ) const bootstrapN = 10_000 // Summary holds all computed statistics for one (algorithm, variant, operation) group. type Summary struct { N int Mean float64 // Median is the sample median (p50). Median float64 Std float64 MAD float64 P5 float64 P25 float64 P75 float64 P95 float64 P99 float64 // CI95 is the bootstrapped 95% confidence interval for the median. CI95 [2]float64 } // Compute derives all statistics from a sorted (ascending) slice of values. // The caller must sort the slice before passing it in. func Compute(sorted []int64) Summary { n := len(sorted) if n == 0 { return Summary{} } s := Summary{N: n} s.Mean = mean(sorted) s.Median = percentileFromSorted(sorted, 50) s.Std = stddev(sorted, s.Mean) s.MAD = mad(sorted, s.Median) s.P5 = percentileFromSorted(sorted, 5) s.P25 = percentileFromSorted(sorted, 25) s.P75 = percentileFromSorted(sorted, 75) s.P95 = percentileFromSorted(sorted, 95) s.P99 = percentileFromSorted(sorted, 99) s.CI95 = bootstrapMedianCI(sorted, bootstrapN) return s } func mean(xs []int64) float64 { var sum float64 for _, x := range xs { sum += float64(x) } return sum / float64(len(xs)) } func stddev(xs []int64, m float64) float64 { var variance float64 for _, x := range xs { d := float64(x) - m variance += d * d } return math.Sqrt(variance / float64(len(xs))) } func mad(sorted []int64, median float64) float64 { devs := make([]float64, len(sorted)) for i, x := range sorted { devs[i] = math.Abs(float64(x) - median) } slices.Sort(devs) n := len(devs) if n%2 == 0 { return (devs[n/2-1] + devs[n/2]) / 2 } return devs[n/2] } // percentileFromSorted uses linear interpolation (same as numpy's default). func percentileFromSorted(sorted []int64, p float64) float64 { n := float64(len(sorted)) if n == 1 { return float64(sorted[0]) } rank := p / 100 * (n - 1) lo := int(math.Floor(rank)) hi := int(math.Ceil(rank)) frac := rank - float64(lo) return float64(sorted[lo])*(1-frac) + float64(sorted[hi])*frac } // bootstrapMedianCI resamples the data bootstrapN times and returns the // [2.5th, 97.5th] percentile of the bootstrap median distribution. func bootstrapMedianCI(sorted []int64, iters int) [2]float64 { n := len(sorted) buf := make([]int64, n) medians := make([]float64, iters) for i := range iters { for j := range n { buf[j] = sorted[rand.IntN(n)] } slices.Sort(buf) medians[i] = percentileFromSorted(buf, 50) } slices.Sort(medians) return [2]float64{ percentile64(medians, 2.5), percentile64(medians, 97.5), } } func percentile64(sorted []float64, p float64) float64 { n := float64(len(sorted)) if n == 1 { return sorted[0] } rank := p / 100 * (n - 1) lo := int(math.Floor(rank)) hi := int(math.Ceil(rank)) frac := rank - float64(lo) return sorted[lo]*(1-frac) + sorted[hi]*frac } // SortInt64 sorts a slice of int64 in place (ascending). func SortInt64(xs []int64) { slices.SortFunc(xs, cmp.Compare) }