\documentclass[sigconf, nonacm]{acmart} \usepackage{booktabs} \usepackage{microtype} \usepackage{pgfplots} \pgfplotsset{compat=1.18} % ── Metadata (fill in when ready) ──────────────────────────────────────────── \title{SIMD Optimization in Post-Quantum Cryptography:\\ A Micro-Architecture and Energy Analysis} \author{Levi Neuwirth} \affiliation{% \institution{Brown University} \city{Providence} \state{Rhode Island} \country{USA} } \email{ln@levineuwirth.org} \begin{abstract} TODO \end{abstract} \keywords{post-quantum cryptography, ML-KEM, Kyber, SIMD, AVX2, performance analysis, energy efficiency, micro-architecture} % ───────────────────────────────────────────────────────────────────────────── \begin{document} \maketitle % ── 1. Introduction ────────────────────────────────────────────────────────── \section{Introduction} \label{sec:intro} TODO % ── 2. Background ──────────────────────────────────────────────────────────── \section{Background} \label{sec:background} \subsection{ML-KEM / Kyber} TODO: Module-LWE, ring structure, NTT. \subsection{SIMD on x86-64} TODO: AVX2 register model, relevant instructions for polynomial arithmetic. \subsection{Hardware Performance Counters and RAPL} TODO: perf, PAPI, Intel RAPL energy domains. % ── 3. Methodology ─────────────────────────────────────────────────────────── \section{Methodology} \label{sec:methodology} \subsection{Implementation Variants} TODO: ref (AVX2 intrinsics), refnv (scalar, no vectorization), refo0 (unoptimized baseline). \subsection{Benchmark Harness} TODO: cycle counter, iteration count, statistical methodology, OSCAR node spec. \subsection{Hardware Counter Collection} TODO: PAPI events selected and why. \subsection{Energy Measurement} TODO: RAPL pkg + DRAM domains, joules-per-operation derivation. % ── 4. Results ─────────────────────────────────────────────────────────────── \section{Results} \label{sec:results} \subsection{Cycle Counts} \begin{table}[h] \caption{Median cycle counts, ML-KEM-512, 10\,000 iterations.} \label{tab:cycles512} \begin{tabular}{lrrr} \toprule Operation & ref (AVX2) & refnv (scalar) & speedup \\ \midrule NTT & TODO & TODO & TODO$\times$ \\ INVNTT & TODO & TODO & TODO$\times$ \\ polyvec\_basemul\_acc & TODO & TODO & TODO$\times$ \\ indcpa\_keypair & TODO & TODO & TODO$\times$ \\ indcpa\_enc & TODO & TODO & TODO$\times$ \\ kyber\_encaps & TODO & TODO & TODO$\times$ \\ kyber\_decaps & TODO & TODO & TODO$\times$ \\ \bottomrule \end{tabular} \end{table} \subsection{Hardware Counter Breakdown} TODO: IPC, cache miss rates, branch mispredictions. \subsection{Energy Efficiency} TODO: joules/operation, EDP comparison. % ── 5. Discussion ──────────────────────────────────────────────────────────── \section{Discussion} \label{sec:discussion} TODO: mechanistic explanation of where the speedup comes from. % ── 6. Related Work ────────────────────────────────────────────────────────── \section{Related Work} \label{sec:related} TODO % ── 7. Conclusion ──────────────────────────────────────────────────────────── \section{Conclusion} \label{sec:conclusion} TODO % ── References ─────────────────────────────────────────────────────────────── \bibliographystyle{ACM-Reference-Format} \bibliography{refs} \end{document}