#!/usr/bin/env python3 """Matplotlib draft figures for the PQC SIMD speedup analysis. Usage: python3 analysis/figures.py [--json analysis/results.json] [--out figures/] """ import argparse import json from pathlib import Path import matplotlib.pyplot as plt import matplotlib.ticker as ticker import numpy as np # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- # Cumulative stages used in Figure 1 (each shows total speedup from refo0) STAGE_KEYS = ["refo0_to_refnv", "refo0_to_ref", "refo0_to_avx2"] STAGE_LABELS = ["O3, no auto-vec", "O3 + auto-vec", "O3 + hand SIMD (avx2)"] STAGE_COLORS = ["#4C72B0", "#55A868", "#C44E52"] # Ops to show in the primary figures (excludes top-level KEM wrappers) PRIMARY_OPS = { "poly_frommsg", "INVNTT", "polyvec_basemul_acc_montgomery", "NTT", "indcpa_dec", "polyvec_decompress", "poly_decompress", "poly_compress", "poly_tomsg", "polyvec_compress", "indcpa_enc", "indcpa_keypair", "gen_a", "poly_getnoise_eta1", "poly_getnoise_eta2", } # Short display names OP_SHORT = { "poly_frommsg": "frommsg", "INVNTT": "INVNTT", "polyvec_basemul_acc_montgomery": "basemul", "NTT": "NTT", "indcpa_dec": "dec", "polyvec_decompress": "pvec_decomp", "poly_decompress": "poly_decomp", "poly_compress": "poly_comp", "poly_tomsg": "tomsg", "polyvec_compress": "pvec_comp", "indcpa_enc": "enc", "indcpa_keypair": "keypair", "gen_a": "gen_a", "poly_getnoise_eta1": "noise_η₁", "poly_getnoise_eta2": "noise_η₂", } ALGORITHMS = ["mlkem512", "mlkem768", "mlkem1024"] ALG_TITLES = {"mlkem512": "ML-KEM-512", "mlkem768": "ML-KEM-768", "mlkem1024": "ML-KEM-1024"} # Operations selected to illustrate the distribution figure: # one high-speedup arithmetic op, one medium SHAKE-bound op, one low-speedup op DIST_OPS = [ ("INVNTT", "INVNTT\n(~55× speedup)"), ("gen_a", "gen_a\n(~4× speedup)"), ("poly_getnoise_eta1","noise η₁\n(~1.3× speedup)"), ] # Per-polynomial ops whose speedup should be param-independent CROSS_PARAM_OPS = [ "poly_frommsg", "INVNTT", "polyvec_basemul_acc_montgomery", "NTT", ] # KEM-level ops for supplementary KEM_OPS = ["kyber_keypair", "kyber_encaps", "kyber_decaps"] KEM_SHORT = {"kyber_keypair": "KeyGen", "kyber_encaps": "Encaps", "kyber_decaps": "Decaps"} # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def load(json_path: str) -> list[dict]: with open(json_path) as f: return json.load(f) def ops_for_alg(results: list[dict], alg: str) -> list[dict]: rows = [r for r in results if r["algorithm"] == alg and r["operation"] in PRIMARY_OPS] rows.sort(key=lambda r: -r["comparisons"].get("ref_to_avx2", {}).get("speedup", 0)) return rows # --------------------------------------------------------------------------- # Figure 1: cumulative grouped bars — speedup at each optimisation stage # # Each group shows three bars for one operation: # refo0→refnv total speedup with O3, auto-vec OFF # refo0→ref total speedup with O3, auto-vec ON # refo0→avx2 total speedup with O3 + hand-written SIMD # # Because all bars share the same baseline (refo0=1), they are directly # comparable without any additive/multiplicative ambiguity. # --------------------------------------------------------------------------- def fig_decomposition(results: list[dict], out_dir: Path) -> None: fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=False) for ax, alg in zip(axes, ALGORITHMS): rows = ops_for_alg(results, alg) if not rows: ax.set_visible(False) continue ops = [OP_SHORT.get(r["operation"], r["operation"]) for r in rows] n = len(rows) group = np.arange(n) # Three bars per group, evenly spaced within each group slot bar_w = 0.22 offsets = np.array([-bar_w, 0, bar_w]) for (key, label, color), offset in zip( zip(STAGE_KEYS, STAGE_LABELS, STAGE_COLORS), offsets ): vals = np.array([r["comparisons"].get(key, {}).get("speedup", 0.0) for r in rows]) ci_lo = np.array([r["comparisons"].get(key, {}).get("ci95", [0.0, 0.0])[0] for r in rows]) ci_hi = np.array([r["comparisons"].get(key, {}).get("ci95", [0.0, 0.0])[1] for r in rows]) yerr = np.array([vals - ci_lo, ci_hi - vals]) mask = vals > 0 ax.bar(group[mask] + offset, vals[mask], bar_w, label=label, color=color, alpha=0.88, zorder=3) ax.errorbar(group[mask] + offset, vals[mask], yerr=yerr[:, mask], fmt="none", ecolor="black", elinewidth=0.7, capsize=2, zorder=4) ax.set_yscale("log") ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda v, _: f"{v:g}×")) ax.set_title(ALG_TITLES[alg], fontsize=12, fontweight="bold") ax.set_xticks(group) ax.set_xticklabels(ops, rotation=45, ha="right", fontsize=8) ax.set_ylabel("Speedup over -O0 (×, log scale)" if alg == "mlkem512" else "") ax.grid(axis="y", which="both", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0) ax.set_axisbelow(True) ax.set_xlim(-0.5, n - 0.5) handles, labels = axes[0].get_legend_handles_labels() fig.legend(handles, labels, loc="upper center", ncol=3, fontsize=10, frameon=True, bbox_to_anchor=(0.5, 1.02)) fig.suptitle( "ML-KEM Cumulative Speedup at Each Optimisation Stage " "(Intel Xeon Platinum 8268, 95% bootstrap CI)", fontsize=11, y=1.06, ) fig.tight_layout() _save(fig, out_dir, "decomposition") # --------------------------------------------------------------------------- # Figure 2: hand-SIMD speedup (ref→avx2), all algorithms overlaid, log scale # --------------------------------------------------------------------------- def fig_hand_simd(results: list[dict], out_dir: Path) -> None: all_ops: dict[str, dict] = {} for r in results: if r["operation"] in PRIMARY_OPS and "ref_to_avx2" in r["comparisons"]: all_ops.setdefault(r["operation"], {}) all_ops[r["operation"]][r["algorithm"]] = r["comparisons"]["ref_to_avx2"] ops_sorted = sorted( all_ops, key=lambda op: -all_ops[op].get("mlkem512", {}).get("speedup", 0), ) short_ops = [OP_SHORT.get(op, op) for op in ops_sorted] x = np.arange(len(ops_sorted)) bar_w = 0.25 offsets = [-bar_w, 0, bar_w] colors = ["#4C72B0", "#55A868", "#C44E52"] fig, ax = plt.subplots(figsize=(14, 5)) for alg, offset, color in zip(ALGORITHMS, offsets, colors): vals = np.array([all_ops[op].get(alg, {}).get("speedup", 0) for op in ops_sorted]) ci_lo = np.array([all_ops[op].get(alg, {}).get("ci95", [0, 0])[0] for op in ops_sorted]) ci_hi = np.array([all_ops[op].get(alg, {}).get("ci95", [0, 0])[1] for op in ops_sorted]) yerr = np.array([vals - ci_lo, ci_hi - vals]) mask = vals > 0 ax.bar(x[mask] + offset, vals[mask], bar_w, label=ALG_TITLES[alg], color=color, alpha=0.85, zorder=3) ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask], fmt="none", ecolor="black", elinewidth=0.7, capsize=2, zorder=4) ax.set_yscale("log") ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda v, _: f"{v:g}×")) ax.set_xticks(x) ax.set_xticklabels(short_ops, rotation=45, ha="right", fontsize=9) ax.set_ylabel("Speedup ref → avx2 (×, log scale)") ax.set_title( "Hand-Written SIMD Speedup over Compiler-Optimised C\n" "(Intel Xeon Platinum 8268, 95% bootstrap CI, n≥2000 per group)" ) ax.grid(axis="y", which="both", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0) ax.set_axisbelow(True) ax.legend(fontsize=10) fig.tight_layout() _save(fig, out_dir, "hand_simd_speedup") # --------------------------------------------------------------------------- # Figure 3: Cliff's delta heatmap (ref→avx2) # --------------------------------------------------------------------------- def fig_cliffs_heatmap(results: list[dict], out_dir: Path) -> None: ops_set = sorted( {r["operation"] for r in results if "ref_to_avx2" in r["comparisons"]}, key=lambda op: -max( r["comparisons"]["ref_to_avx2"]["cliffs_delta"] for r in results if r["operation"] == op and "ref_to_avx2" in r["comparisons"] ), ) short_ops = [OP_SHORT.get(op, op) for op in ops_set] data = np.full((len(ALGORITHMS), len(ops_set)), np.nan) for i, alg in enumerate(ALGORITHMS): for j, op in enumerate(ops_set): match = [r for r in results if r["algorithm"] == alg and r["operation"] == op] if match and "ref_to_avx2" in match[0]["comparisons"]: data[i, j] = match[0]["comparisons"]["ref_to_avx2"]["cliffs_delta"] n_ops = len(ops_set) fig, ax = plt.subplots(figsize=(max(10, n_ops * 0.85), 3.2)) im = ax.imshow(data, aspect="auto", cmap="RdYlGn", vmin=-1, vmax=1) plt.colorbar(im, ax=ax, label="Cliff's δ", fraction=0.03, pad=0.02) ax.set_yticks(range(len(ALGORITHMS))) ax.set_yticklabels([ALG_TITLES[a] for a in ALGORITHMS], fontsize=10) ax.set_xticks(range(n_ops)) ax.set_xticklabels(short_ops, rotation=45, ha="right", fontsize=9) ax.set_title( "Cliff's δ (ref vs. avx2) δ = +1.00: avx2 strictly faster in every observation pair", fontsize=10, ) for i in range(len(ALGORITHMS)): for j in range(n_ops): if not np.isnan(data[i, j]): # White text on dark green cells, black elsewhere text_color = "white" if data[i, j] > 0.85 else "black" ax.text(j, i, f"{data[i, j]:+.3f}", ha="center", va="center", fontsize=9, color=text_color, fontweight="bold") fig.tight_layout() _save(fig, out_dir, "cliffs_delta_heatmap") # --------------------------------------------------------------------------- # Figure 4: cycle distribution overlays (requires raw aggregator JSON) # # Three panels: one high-speedup op, one medium, one low. # Each panel overlays ref and avx2 histograms + KDE for mlkem512. # Log x-axis exposes the scale difference honestly. # --------------------------------------------------------------------------- def fig_distributions(raw_records: list[dict], out_dir: Path, alg: str = "mlkem512") -> None: from scipy.stats import gaussian_kde # Build lookup: (alg, variant, op) → raw array raw: dict[tuple, np.ndarray] = {} for r in raw_records: if r.get("raw"): raw[(r["algorithm"], r["variant"], r["operation"])] = np.array(r["raw"], dtype=np.float64) n_ops = len(DIST_OPS) fig, axes = plt.subplots(1, n_ops, figsize=(5 * n_ops, 4)) variant_style = { "ref": {"color": "#4C72B0", "label": "ref (O3)", "alpha": 0.55, "zorder": 2}, "avx2": {"color": "#C44E52", "label": "avx2", "alpha": 0.65, "zorder": 3}, } for ax, (op, subtitle) in zip(axes, DIST_OPS): plotted_any = False for variant in ("ref", "avx2"): arr = raw.get((alg, variant, op)) if arr is None: continue plotted_any = True s = variant_style[variant] # Histogram on log scale log_arr = np.log10(arr) lo, hi = np.floor(log_arr.min()), np.ceil(log_arr.max()) bins = np.logspace(lo, hi, 60) ax.hist(arr, bins=bins, density=True, color=s["color"], alpha=s["alpha"], zorder=s["zorder"], label=s["label"]) # KDE on log scale, back-transformed kde = gaussian_kde(log_arr, bw_method=0.25) xs_log = np.linspace(lo, hi, 400) xs = 10 ** xs_log # KDE is in log space; convert density: p(x) = p(log x) / (x ln10) ys = kde(xs_log) / (xs * np.log(10)) ax.plot(xs, ys, color=s["color"], linewidth=1.8, zorder=s["zorder"] + 1) # Median line med = float(np.median(arr)) ax.axvline(med, color=s["color"], linewidth=1.2, linestyle="--", zorder=5) if not plotted_any: ax.set_visible(False) continue ax.set_xscale("log") ax.set_xlabel("Cycles (log scale)") ax.set_ylabel("Density" if op == DIST_OPS[0][0] else "") ax.set_title(subtitle, fontsize=10) ax.legend(fontsize=9) ax.xaxis.set_major_formatter(ticker.LogFormatterSciNotation(labelOnlyBase=False)) ax.grid(axis="x", which="both", linestyle="--", linewidth=0.4, alpha=0.4) ax.set_axisbelow(True) fig.suptitle( f"Cycle Count Distributions — ref vs. avx2 ({ALG_TITLES[alg]})\n" "Dashed lines show medians. Distributions are right-skewed → nonparametric statistics.", fontsize=10, ) fig.tight_layout() _save(fig, out_dir, "distributions") # --------------------------------------------------------------------------- # Figure 5: cross-param speedup consistency # # For per-polynomial operations the polynomial dimension is always 256, # independent of the security parameter k. Speedups should be identical # across mlkem512/768/1024. This figure verifies that. # --------------------------------------------------------------------------- def fig_cross_param(results: list[dict], out_dir: Path) -> None: ops = CROSS_PARAM_OPS short = [OP_SHORT.get(op, op) for op in ops] x = np.arange(len(ops)) bar_w = 0.22 offsets = np.array([-bar_w, 0, bar_w]) colors = ["#4C72B0", "#55A868", "#C44E52"] fig, ax = plt.subplots(figsize=(8, 4)) for alg, offset, color in zip(ALGORITHMS, offsets, colors): vals, ci_lo, ci_hi = [], [], [] for op in ops: match = [r for r in results if r["algorithm"] == alg and r["operation"] == op] if match and "ref_to_avx2" in match[0]["comparisons"]: c = match[0]["comparisons"]["ref_to_avx2"] vals.append(c["speedup"]) ci_lo.append(c["ci95"][0]) ci_hi.append(c["ci95"][1]) else: vals.append(0); ci_lo.append(0); ci_hi.append(0) vals = np.array(vals) ci_lo = np.array(ci_lo) ci_hi = np.array(ci_hi) yerr = np.array([vals - ci_lo, ci_hi - vals]) mask = vals > 0 ax.bar(x[mask] + offset, vals[mask], bar_w, label=ALG_TITLES[alg], color=color, alpha=0.88, zorder=3) ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask], fmt="none", ecolor="black", elinewidth=0.8, capsize=3, zorder=4) ax.set_xticks(x) ax.set_xticklabels(short, fontsize=11) ax.set_ylabel("Speedup ref → avx2 (×)") ax.set_title( "Per-Polynomial Operation Speedup Across Security Parameters\n" "(polynomial dim = 256 for all; NTT variation attributed to cache-state differences)" ) ax.yaxis.set_minor_locator(ticker.AutoMinorLocator()) ax.grid(axis="y", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0) ax.set_axisbelow(True) ax.legend(fontsize=10) fig.tight_layout() _save(fig, out_dir, "cross_param") # --------------------------------------------------------------------------- # Figure S1: KEM-level end-to-end speedup (supplementary) # --------------------------------------------------------------------------- def fig_kem_level(results: list[dict], out_dir: Path) -> None: ops = KEM_OPS short = [KEM_SHORT[op] for op in ops] x = np.arange(len(ops)) bar_w = 0.22 offsets = np.array([-bar_w, 0, bar_w]) colors = ["#4C72B0", "#55A868", "#C44E52"] fig, ax = plt.subplots(figsize=(7, 4)) for alg, offset, color in zip(ALGORITHMS, offsets, colors): vals, ci_lo, ci_hi = [], [], [] for op in ops: match = [r for r in results if r["algorithm"] == alg and r["operation"] == op] if match and "ref_to_avx2" in match[0]["comparisons"]: c = match[0]["comparisons"]["ref_to_avx2"] vals.append(c["speedup"]) ci_lo.append(c["ci95"][0]) ci_hi.append(c["ci95"][1]) else: vals.append(0); ci_lo.append(0); ci_hi.append(0) vals = np.array(vals) ci_lo = np.array(ci_lo) ci_hi = np.array(ci_hi) yerr = np.array([vals - ci_lo, ci_hi - vals]) mask = vals > 0 ax.bar(x[mask] + offset, vals[mask], bar_w, label=ALG_TITLES[alg], color=color, alpha=0.88, zorder=3) ax.errorbar(x[mask] + offset, vals[mask], yerr=yerr[:, mask], fmt="none", ecolor="black", elinewidth=0.8, capsize=3, zorder=4) ax.set_xticks(x) ax.set_xticklabels(short, fontsize=12) ax.set_ylabel("Speedup ref → avx2 (×)") ax.set_title( "End-to-End KEM Speedup (ref → avx2)\n" "(Intel Xeon Platinum 8268, 95% bootstrap CI)" ) ax.yaxis.set_minor_locator(ticker.AutoMinorLocator()) ax.grid(axis="y", linestyle="--", linewidth=0.4, alpha=0.5, zorder=0) ax.set_axisbelow(True) ax.legend(fontsize=10) fig.tight_layout() _save(fig, out_dir, "kem_level") # --------------------------------------------------------------------------- # Shared save helper # --------------------------------------------------------------------------- def _save(fig: plt.Figure, out_dir: Path, stem: str) -> None: fig.savefig(out_dir / f"{stem}.pdf", bbox_inches="tight") fig.savefig(out_dir / f"{stem}.png", bbox_inches="tight", dpi=150) print(f"Saved {out_dir}/{stem}.{{pdf,png}}") plt.close(fig) # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--json", default="analysis/results.json", help="analyzed results JSON (from analyze.py)") parser.add_argument("--raw-json", default=None, help="raw aggregator JSON (from aggregate --raw); required for --distributions") parser.add_argument("--out", default="analysis/figures") args = parser.parse_args() out_dir = Path(args.out) out_dir.mkdir(parents=True, exist_ok=True) results = load(args.json) print(f"Loaded {len(results)} result rows.") fig_decomposition(results, out_dir) fig_hand_simd(results, out_dir) fig_cliffs_heatmap(results, out_dir) fig_cross_param(results, out_dir) fig_kem_level(results, out_dir) if args.raw_json: raw_records = load(args.raw_json) print(f"Loaded {len(raw_records)} raw groups for distributions.") fig_distributions(raw_records, out_dir) else: print("Skipping distributions figure (pass --raw-json to enable).") if __name__ == "__main__": main()