where-simd-helps/paper/main.tex

121 lines
4.5 KiB
TeX

\documentclass[sigconf, nonacm]{acmart}
\usepackage{booktabs}
\usepackage{microtype}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}
% ── Metadata (fill in when ready) ────────────────────────────────────────────
\title{SIMD Optimization in Post-Quantum Cryptography:\\
A Micro-Architecture and Energy Analysis}
\author{Levi Neuwirth}
\affiliation{%
\institution{Brown University}
\city{Providence}
\state{Rhode Island}
\country{USA}
}
\email{ln@levineuwirth.org}
\begin{abstract}
TODO
\end{abstract}
\keywords{post-quantum cryptography, ML-KEM, Kyber, SIMD, AVX2, performance
analysis, energy efficiency, micro-architecture}
% ─────────────────────────────────────────────────────────────────────────────
\begin{document}
\maketitle
% ── 1. Introduction ──────────────────────────────────────────────────────────
\section{Introduction}
\label{sec:intro}
TODO
% ── 2. Background ────────────────────────────────────────────────────────────
\section{Background}
\label{sec:background}
\subsection{ML-KEM / Kyber}
TODO: Module-LWE, ring structure, NTT.
\subsection{SIMD on x86-64}
TODO: AVX2 register model, relevant instructions for polynomial arithmetic.
\subsection{Hardware Performance Counters and RAPL}
TODO: perf, PAPI, Intel RAPL energy domains.
% ── 3. Methodology ───────────────────────────────────────────────────────────
\section{Methodology}
\label{sec:methodology}
\subsection{Implementation Variants}
TODO: ref (AVX2 intrinsics), refnv (scalar, no vectorization), refo0 (unoptimized
baseline).
\subsection{Benchmark Harness}
TODO: cycle counter, iteration count, statistical methodology, OSCAR node spec.
\subsection{Hardware Counter Collection}
TODO: PAPI events selected and why.
\subsection{Energy Measurement}
TODO: RAPL pkg + DRAM domains, joules-per-operation derivation.
% ── 4. Results ───────────────────────────────────────────────────────────────
\section{Results}
\label{sec:results}
\subsection{Cycle Counts}
\begin{table}[h]
\caption{Median cycle counts, ML-KEM-512, 10\,000 iterations.}
\label{tab:cycles512}
\begin{tabular}{lrrr}
\toprule
Operation & ref (AVX2) & refnv (scalar) & speedup \\
\midrule
NTT & TODO & TODO & TODO$\times$ \\
INVNTT & TODO & TODO & TODO$\times$ \\
polyvec\_basemul\_acc & TODO & TODO & TODO$\times$ \\
indcpa\_keypair & TODO & TODO & TODO$\times$ \\
indcpa\_enc & TODO & TODO & TODO$\times$ \\
kyber\_encaps & TODO & TODO & TODO$\times$ \\
kyber\_decaps & TODO & TODO & TODO$\times$ \\
\bottomrule
\end{tabular}
\end{table}
\subsection{Hardware Counter Breakdown}
TODO: IPC, cache miss rates, branch mispredictions.
\subsection{Energy Efficiency}
TODO: joules/operation, EDP comparison.
% ── 5. Discussion ────────────────────────────────────────────────────────────
\section{Discussion}
\label{sec:discussion}
TODO: mechanistic explanation of where the speedup comes from.
% ── 6. Related Work ──────────────────────────────────────────────────────────
\section{Related Work}
\label{sec:related}
TODO
% ── 7. Conclusion ────────────────────────────────────────────────────────────
\section{Conclusion}
\label{sec:conclusion}
TODO
% ── References ───────────────────────────────────────────────────────────────
\bibliographystyle{ACM-Reference-Format}
\bibliography{refs}
\end{document}