where-simd-helps/analysis/figures.py

488 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Matplotlib draft figures for the PQC SIMD speedup analysis.
Usage:
python3 analysis/figures.py [--json analysis/results.json] [--out figures/]
"""
import argparse
import json
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
# Cumulative stages used in Figure 1 (each shows total speedup from refo0)
STAGE_KEYS = ["refo0_to_refnv", "refo0_to_ref", "refo0_to_avx2"]
STAGE_LABELS = ["O3, no auto-vec", "O3 + auto-vec", "O3 + hand SIMD (avx2)"]
STAGE_COLORS = ["#4C72B0", "#55A868", "#C44E52"]
# Ops to show in the primary figures (excludes top-level KEM wrappers)
PRIMARY_OPS = {
"poly_frommsg", "INVNTT", "polyvec_basemul_acc_montgomery", "NTT",
"indcpa_dec", "polyvec_decompress", "poly_decompress",
"poly_compress", "poly_tomsg", "polyvec_compress",
"indcpa_enc", "indcpa_keypair", "gen_a",
"poly_getnoise_eta1", "poly_getnoise_eta2",
}
# Short display names
OP_SHORT = {
"poly_frommsg": "frommsg",
"INVNTT": "INVNTT",
"polyvec_basemul_acc_montgomery": "basemul",
"NTT": "NTT",
"indcpa_dec": "dec",
"polyvec_decompress": "pvec_decomp",
"poly_decompress": "poly_decomp",
"poly_compress": "poly_comp",
"poly_tomsg": "tomsg",
"polyvec_compress": "pvec_comp",
"indcpa_enc": "enc",
"indcpa_keypair": "keypair",
"gen_a": "gen_a",
"poly_getnoise_eta1": "noise_η₁",
"poly_getnoise_eta2": "noise_η₂",
}
ALGORITHMS = ["mlkem512", "mlkem768", "mlkem1024"]
ALG_TITLES = {"mlkem512": "ML-KEM-512", "mlkem768": "ML-KEM-768", "mlkem1024": "ML-KEM-1024"}
# Operations selected to illustrate the distribution figure:
# one high-speedup arithmetic op, one medium SHAKE-bound op, one low-speedup op
DIST_OPS = [
("INVNTT", "INVNTT\n(~55× speedup)"),
("gen_a", "gen_a\n(~4× speedup)"),
("poly_getnoise_eta1","noise η₁\n(~1.3× speedup)"),
]
# Per-polynomial ops whose speedup should be param-independent
CROSS_PARAM_OPS = [
"poly_frommsg",
"INVNTT",
"polyvec_basemul_acc_montgomery",
"NTT",
]
# KEM-level ops for supplementary
KEM_OPS = ["kyber_keypair", "kyber_encaps", "kyber_decaps"]
KEM_SHORT = {"kyber_keypair": "KeyGen", "kyber_encaps": "Encaps", "kyber_decaps": "Decaps"}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def load(json_path: str) -> list[dict]:
with open(json_path) as f:
return json.load(f)
def ops_for_alg(results: list[dict], alg: str) -> list[dict]:
rows = [r for r in results if r["algorithm"] == alg and r["operation"] in PRIMARY_OPS]
rows.sort(key=lambda r: -r["comparisons"].get("ref_to_avx2", {}).get("speedup", 0))
return rows
# ---------------------------------------------------------------------------
# Figure 1: cumulative grouped bars — speedup at each optimisation stage
#
# Each group shows three bars for one operation:
# refo0→refnv total speedup with O3, auto-vec OFF
# refo0→ref total speedup with O3, auto-vec ON
# refo0→avx2 total speedup with O3 + hand-written SIMD
#
# Because all bars share the same baseline (refo0=1), they are directly
# comparable without any additive/multiplicative ambiguity.
# ---------------------------------------------------------------------------
def fig_decomposition(results: list[dict], out_dir: Path) -> None:
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=False)
for ax, alg in zip(axes, ALGORITHMS):
rows = ops_for_alg(results, alg)
if not rows:
ax.set_visible(False)
continue
ops = [OP_SHORT.get(r["operation"], r["operation"]) for r in rows]
n = len(rows)
group = np.arange(n)
# Three bars per group, evenly spaced within each group slot
bar_w = 0.22
offsets = np.array([-bar_w, 0, bar_w])
for (key, label, color), offset in zip(
zip(STAGE_KEYS, STAGE_LABELS, STAGE_COLORS), offsets
):
vals = np.array([r["comparisons"].get(key, {}).get("speedup", 0.0) for r in rows])
ci_lo = np.array([r["comparisons"].get(key, {}).get("ci95", [0.0, 0.0])[0] for r in rows])
ci_hi = np.array([r["comparisons"].get(key, {}).get("ci95", [0.0, 0.0])[1] for r in rows])
yerr = np.array([vals - ci_lo, ci_hi - vals])
mask = vals > 0
ax.bar(group[mask] + offset, vals[mask], bar_w,
label=label, color=color, alpha=0.88, zorder=3)
ax.errorbar(group[mask] + offset, vals[mask], yerr=yerr[:, mask],
fmt="none", ecolor="black", elinewidth=0.7, capsize=2, zorder=4)
ax.set_yscale("log")
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda v, _: f"{v:g}×"))
ax.set_title(ALG_TITLES[alg], fontsize=12, fontweight="bold")
ax.set_xticks(group)
ax.set_xticklabels(ops, rotation=45, ha="right", fontsize=8)
ax.set_ylabel("Speedup over -O0 (×, log scale)" if alg == "mlkem512" else "")
ax.grid(axis="y", which="both", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0)
ax.set_axisbelow(True)
ax.set_xlim(-0.5, n - 0.5)
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper center", ncol=3,
fontsize=10, frameon=True, bbox_to_anchor=(0.5, 1.02))
fig.suptitle(
"ML-KEM Cumulative Speedup at Each Optimisation Stage "
"(Intel Xeon Platinum 8268, 95% bootstrap CI)",
fontsize=11, y=1.06,
)
fig.tight_layout()
_save(fig, out_dir, "decomposition")
# ---------------------------------------------------------------------------
# Figure 2: hand-SIMD speedup (ref→avx2), all algorithms overlaid, log scale
# ---------------------------------------------------------------------------
def fig_hand_simd(results: list[dict], out_dir: Path) -> None:
all_ops: dict[str, dict] = {}
for r in results:
if r["operation"] in PRIMARY_OPS and "ref_to_avx2" in r["comparisons"]:
all_ops.setdefault(r["operation"], {})
all_ops[r["operation"]][r["algorithm"]] = r["comparisons"]["ref_to_avx2"]
ops_sorted = sorted(
all_ops,
key=lambda op: -all_ops[op].get("mlkem512", {}).get("speedup", 0),
)
short_ops = [OP_SHORT.get(op, op) for op in ops_sorted]
x = np.arange(len(ops_sorted))
bar_w = 0.25
offsets = [-bar_w, 0, bar_w]
colors = ["#4C72B0", "#55A868", "#C44E52"]
fig, ax = plt.subplots(figsize=(14, 5))
for alg, offset, color in zip(ALGORITHMS, offsets, colors):
vals = np.array([all_ops[op].get(alg, {}).get("speedup", 0) for op in ops_sorted])
ci_lo = np.array([all_ops[op].get(alg, {}).get("ci95", [0, 0])[0] for op in ops_sorted])
ci_hi = np.array([all_ops[op].get(alg, {}).get("ci95", [0, 0])[1] for op in ops_sorted])
yerr = np.array([vals - ci_lo, ci_hi - vals])
mask = vals > 0
ax.bar(x[mask] + offset, vals[mask], bar_w,
label=ALG_TITLES[alg], color=color, alpha=0.85, zorder=3)
ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask],
fmt="none", ecolor="black", elinewidth=0.7, capsize=2, zorder=4)
ax.set_yscale("log")
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda v, _: f"{v:g}×"))
ax.set_xticks(x)
ax.set_xticklabels(short_ops, rotation=45, ha="right", fontsize=9)
ax.set_ylabel("Speedup ref → avx2 (×, log scale)")
ax.set_title(
"Hand-Written SIMD Speedup over Compiler-Optimised C\n"
"(Intel Xeon Platinum 8268, 95% bootstrap CI, n≥2000 per group)"
)
ax.grid(axis="y", which="both", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0)
ax.set_axisbelow(True)
ax.legend(fontsize=10)
fig.tight_layout()
_save(fig, out_dir, "hand_simd_speedup")
# ---------------------------------------------------------------------------
# Figure 3: Cliff's delta heatmap (ref→avx2)
# ---------------------------------------------------------------------------
def fig_cliffs_heatmap(results: list[dict], out_dir: Path) -> None:
ops_set = sorted(
{r["operation"] for r in results if "ref_to_avx2" in r["comparisons"]},
key=lambda op: -max(
r["comparisons"]["ref_to_avx2"]["cliffs_delta"]
for r in results
if r["operation"] == op and "ref_to_avx2" in r["comparisons"]
),
)
short_ops = [OP_SHORT.get(op, op) for op in ops_set]
data = np.full((len(ALGORITHMS), len(ops_set)), np.nan)
for i, alg in enumerate(ALGORITHMS):
for j, op in enumerate(ops_set):
match = [r for r in results if r["algorithm"] == alg and r["operation"] == op]
if match and "ref_to_avx2" in match[0]["comparisons"]:
data[i, j] = match[0]["comparisons"]["ref_to_avx2"]["cliffs_delta"]
n_ops = len(ops_set)
fig, ax = plt.subplots(figsize=(max(10, n_ops * 0.85), 3.2))
im = ax.imshow(data, aspect="auto", cmap="RdYlGn", vmin=-1, vmax=1)
plt.colorbar(im, ax=ax, label="Cliff's δ", fraction=0.03, pad=0.02)
ax.set_yticks(range(len(ALGORITHMS)))
ax.set_yticklabels([ALG_TITLES[a] for a in ALGORITHMS], fontsize=10)
ax.set_xticks(range(n_ops))
ax.set_xticklabels(short_ops, rotation=45, ha="right", fontsize=9)
ax.set_title(
"Cliff's δ (ref vs. avx2) δ = +1.00: avx2 strictly faster in every observation pair",
fontsize=10,
)
for i in range(len(ALGORITHMS)):
for j in range(n_ops):
if not np.isnan(data[i, j]):
# White text on dark green cells, black elsewhere
text_color = "white" if data[i, j] > 0.85 else "black"
ax.text(j, i, f"{data[i, j]:+.3f}", ha="center", va="center",
fontsize=9, color=text_color, fontweight="bold")
fig.tight_layout()
_save(fig, out_dir, "cliffs_delta_heatmap")
# ---------------------------------------------------------------------------
# Figure 4: cycle distribution overlays (requires raw aggregator JSON)
#
# Three panels: one high-speedup op, one medium, one low.
# Each panel overlays ref and avx2 histograms + KDE for mlkem512.
# Log x-axis exposes the scale difference honestly.
# ---------------------------------------------------------------------------
def fig_distributions(raw_records: list[dict], out_dir: Path, alg: str = "mlkem512") -> None:
from scipy.stats import gaussian_kde
# Build lookup: (alg, variant, op) → raw array
raw: dict[tuple, np.ndarray] = {}
for r in raw_records:
if r.get("raw"):
raw[(r["algorithm"], r["variant"], r["operation"])] = np.array(r["raw"], dtype=np.float64)
n_ops = len(DIST_OPS)
fig, axes = plt.subplots(1, n_ops, figsize=(5 * n_ops, 4))
variant_style = {
"ref": {"color": "#4C72B0", "label": "ref (O3)", "alpha": 0.55, "zorder": 2},
"avx2": {"color": "#C44E52", "label": "avx2", "alpha": 0.65, "zorder": 3},
}
for ax, (op, subtitle) in zip(axes, DIST_OPS):
plotted_any = False
for variant in ("ref", "avx2"):
arr = raw.get((alg, variant, op))
if arr is None:
continue
plotted_any = True
s = variant_style[variant]
# Histogram on log scale
log_arr = np.log10(arr)
lo, hi = np.floor(log_arr.min()), np.ceil(log_arr.max())
bins = np.logspace(lo, hi, 60)
ax.hist(arr, bins=bins, density=True, color=s["color"],
alpha=s["alpha"], zorder=s["zorder"], label=s["label"])
# KDE on log scale, back-transformed
kde = gaussian_kde(log_arr, bw_method=0.25)
xs_log = np.linspace(lo, hi, 400)
xs = 10 ** xs_log
# KDE is in log space; convert density: p(x) = p(log x) / (x ln10)
ys = kde(xs_log) / (xs * np.log(10))
ax.plot(xs, ys, color=s["color"], linewidth=1.8, zorder=s["zorder"] + 1)
# Median line
med = float(np.median(arr))
ax.axvline(med, color=s["color"], linewidth=1.2, linestyle="--", zorder=5)
if not plotted_any:
ax.set_visible(False)
continue
ax.set_xscale("log")
ax.set_xlabel("Cycles (log scale)")
ax.set_ylabel("Density" if op == DIST_OPS[0][0] else "")
ax.set_title(subtitle, fontsize=10)
ax.legend(fontsize=9)
ax.xaxis.set_major_formatter(ticker.LogFormatterSciNotation(labelOnlyBase=False))
ax.grid(axis="x", which="both", linestyle="--", linewidth=0.4, alpha=0.4)
ax.set_axisbelow(True)
fig.suptitle(
f"Cycle Count Distributions — ref vs. avx2 ({ALG_TITLES[alg]})\n"
"Dashed lines show medians. Distributions are right-skewed → nonparametric statistics.",
fontsize=10,
)
fig.tight_layout()
_save(fig, out_dir, "distributions")
# ---------------------------------------------------------------------------
# Figure 5: cross-param speedup consistency
#
# For per-polynomial operations the polynomial dimension is always 256,
# independent of the security parameter k. Speedups should be identical
# across mlkem512/768/1024. This figure verifies that.
# ---------------------------------------------------------------------------
def fig_cross_param(results: list[dict], out_dir: Path) -> None:
ops = CROSS_PARAM_OPS
short = [OP_SHORT.get(op, op) for op in ops]
x = np.arange(len(ops))
bar_w = 0.22
offsets = np.array([-bar_w, 0, bar_w])
colors = ["#4C72B0", "#55A868", "#C44E52"]
fig, ax = plt.subplots(figsize=(8, 4))
for alg, offset, color in zip(ALGORITHMS, offsets, colors):
vals, ci_lo, ci_hi = [], [], []
for op in ops:
match = [r for r in results if r["algorithm"] == alg and r["operation"] == op]
if match and "ref_to_avx2" in match[0]["comparisons"]:
c = match[0]["comparisons"]["ref_to_avx2"]
vals.append(c["speedup"])
ci_lo.append(c["ci95"][0])
ci_hi.append(c["ci95"][1])
else:
vals.append(0); ci_lo.append(0); ci_hi.append(0)
vals = np.array(vals)
ci_lo = np.array(ci_lo)
ci_hi = np.array(ci_hi)
yerr = np.array([vals - ci_lo, ci_hi - vals])
mask = vals > 0
ax.bar(x[mask] + offset, vals[mask], bar_w,
label=ALG_TITLES[alg], color=color, alpha=0.88, zorder=3)
ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask],
fmt="none", ecolor="black", elinewidth=0.8, capsize=3, zorder=4)
ax.set_xticks(x)
ax.set_xticklabels(short, fontsize=11)
ax.set_ylabel("Speedup ref → avx2 (×)")
ax.set_title(
"Per-Polynomial Operation Speedup Across Security Parameters\n"
"(polynomial dim = 256 for all; NTT variation attributed to cache-state differences)"
)
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.grid(axis="y", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0)
ax.set_axisbelow(True)
ax.legend(fontsize=10)
fig.tight_layout()
_save(fig, out_dir, "cross_param")
# ---------------------------------------------------------------------------
# Figure S1: KEM-level end-to-end speedup (supplementary)
# ---------------------------------------------------------------------------
def fig_kem_level(results: list[dict], out_dir: Path) -> None:
ops = KEM_OPS
short = [KEM_SHORT[op] for op in ops]
x = np.arange(len(ops))
bar_w = 0.22
offsets = np.array([-bar_w, 0, bar_w])
colors = ["#4C72B0", "#55A868", "#C44E52"]
fig, ax = plt.subplots(figsize=(7, 4))
for alg, offset, color in zip(ALGORITHMS, offsets, colors):
vals, ci_lo, ci_hi = [], [], []
for op in ops:
match = [r for r in results if r["algorithm"] == alg and r["operation"] == op]
if match and "ref_to_avx2" in match[0]["comparisons"]:
c = match[0]["comparisons"]["ref_to_avx2"]
vals.append(c["speedup"])
ci_lo.append(c["ci95"][0])
ci_hi.append(c["ci95"][1])
else:
vals.append(0); ci_lo.append(0); ci_hi.append(0)
vals = np.array(vals)
ci_lo = np.array(ci_lo)
ci_hi = np.array(ci_hi)
yerr = np.array([vals - ci_lo, ci_hi - vals])
mask = vals > 0
ax.bar(x[mask] + offset, vals[mask], bar_w,
label=ALG_TITLES[alg], color=color, alpha=0.88, zorder=3)
ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask],
fmt="none", ecolor="black", elinewidth=0.8, capsize=3, zorder=4)
ax.set_xticks(x)
ax.set_xticklabels(short, fontsize=12)
ax.set_ylabel("Speedup ref → avx2 (×)")
ax.set_title(
"End-to-End KEM Speedup (ref → avx2)\n"
"(Intel Xeon Platinum 8268, 95% bootstrap CI)"
)
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.grid(axis="y", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0)
ax.set_axisbelow(True)
ax.legend(fontsize=10)
fig.tight_layout()
_save(fig, out_dir, "kem_level")
# ---------------------------------------------------------------------------
# Shared save helper
# ---------------------------------------------------------------------------
def _save(fig: plt.Figure, out_dir: Path, stem: str) -> None:
fig.savefig(out_dir / f"{stem}.pdf", bbox_inches="tight")
fig.savefig(out_dir / f"{stem}.png", bbox_inches="tight", dpi=150)
print(f"Saved {out_dir}/{stem}.{{pdf,png}}")
plt.close(fig)
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--json", default="analysis/results.json",
help="analyzed results JSON (from analyze.py)")
parser.add_argument("--raw-json", default=None,
help="raw aggregator JSON (from aggregate --raw); required for --distributions")
parser.add_argument("--out", default="analysis/figures")
args = parser.parse_args()
out_dir = Path(args.out)
out_dir.mkdir(parents=True, exist_ok=True)
results = load(args.json)
print(f"Loaded {len(results)} result rows.")
fig_decomposition(results, out_dir)
fig_hand_simd(results, out_dir)
fig_cliffs_heatmap(results, out_dir)
fig_cross_param(results, out_dir)
fig_kem_level(results, out_dir)
if args.raw_json:
raw_records = load(args.raw_json)
print(f"Loaded {len(raw_records)} raw groups for distributions.")
fig_distributions(raw_records, out_dir)
else:
print("Skipping distributions figure (pass --raw-json to enable).")
if __name__ == "__main__":
main()