121 lines
4.5 KiB
TeX
121 lines
4.5 KiB
TeX
\documentclass[sigconf, nonacm]{acmart}
|
|
|
|
\usepackage{booktabs}
|
|
\usepackage{microtype}
|
|
\usepackage{pgfplots}
|
|
\pgfplotsset{compat=1.18}
|
|
|
|
% ── Metadata (fill in when ready) ────────────────────────────────────────────
|
|
\title{SIMD Optimization in Post-Quantum Cryptography:\\
|
|
A Micro-Architecture and Energy Analysis}
|
|
|
|
\author{Levi Neuwirth}
|
|
\affiliation{%
|
|
\institution{Brown University}
|
|
\city{Providence}
|
|
\state{Rhode Island}
|
|
\country{USA}
|
|
}
|
|
\email{ln@levineuwirth.org}
|
|
|
|
\begin{abstract}
|
|
TODO
|
|
\end{abstract}
|
|
|
|
\keywords{post-quantum cryptography, ML-KEM, Kyber, SIMD, AVX2, performance
|
|
analysis, energy efficiency, micro-architecture}
|
|
|
|
% ─────────────────────────────────────────────────────────────────────────────
|
|
\begin{document}
|
|
\maketitle
|
|
|
|
% ── 1. Introduction ──────────────────────────────────────────────────────────
|
|
\section{Introduction}
|
|
\label{sec:intro}
|
|
|
|
TODO
|
|
|
|
% ── 2. Background ────────────────────────────────────────────────────────────
|
|
\section{Background}
|
|
\label{sec:background}
|
|
|
|
\subsection{ML-KEM / Kyber}
|
|
TODO: Module-LWE, ring structure, NTT.
|
|
|
|
\subsection{SIMD on x86-64}
|
|
TODO: AVX2 register model, relevant instructions for polynomial arithmetic.
|
|
|
|
\subsection{Hardware Performance Counters and RAPL}
|
|
TODO: perf, PAPI, Intel RAPL energy domains.
|
|
|
|
% ── 3. Methodology ───────────────────────────────────────────────────────────
|
|
\section{Methodology}
|
|
\label{sec:methodology}
|
|
|
|
\subsection{Implementation Variants}
|
|
TODO: ref (AVX2 intrinsics), refnv (scalar, no vectorization), refo0 (unoptimized
|
|
baseline).
|
|
|
|
\subsection{Benchmark Harness}
|
|
TODO: cycle counter, iteration count, statistical methodology, OSCAR node spec.
|
|
|
|
\subsection{Hardware Counter Collection}
|
|
TODO: PAPI events selected and why.
|
|
|
|
\subsection{Energy Measurement}
|
|
TODO: RAPL pkg + DRAM domains, joules-per-operation derivation.
|
|
|
|
% ── 4. Results ───────────────────────────────────────────────────────────────
|
|
\section{Results}
|
|
\label{sec:results}
|
|
|
|
\subsection{Cycle Counts}
|
|
|
|
\begin{table}[h]
|
|
\caption{Median cycle counts, ML-KEM-512, 10\,000 iterations.}
|
|
\label{tab:cycles512}
|
|
\begin{tabular}{lrrr}
|
|
\toprule
|
|
Operation & ref (AVX2) & refnv (scalar) & speedup \\
|
|
\midrule
|
|
NTT & TODO & TODO & TODO$\times$ \\
|
|
INVNTT & TODO & TODO & TODO$\times$ \\
|
|
polyvec\_basemul\_acc & TODO & TODO & TODO$\times$ \\
|
|
indcpa\_keypair & TODO & TODO & TODO$\times$ \\
|
|
indcpa\_enc & TODO & TODO & TODO$\times$ \\
|
|
kyber\_encaps & TODO & TODO & TODO$\times$ \\
|
|
kyber\_decaps & TODO & TODO & TODO$\times$ \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{table}
|
|
|
|
\subsection{Hardware Counter Breakdown}
|
|
TODO: IPC, cache miss rates, branch mispredictions.
|
|
|
|
\subsection{Energy Efficiency}
|
|
TODO: joules/operation, EDP comparison.
|
|
|
|
% ── 5. Discussion ────────────────────────────────────────────────────────────
|
|
\section{Discussion}
|
|
\label{sec:discussion}
|
|
|
|
TODO: mechanistic explanation of where the speedup comes from.
|
|
|
|
% ── 6. Related Work ──────────────────────────────────────────────────────────
|
|
\section{Related Work}
|
|
\label{sec:related}
|
|
|
|
TODO
|
|
|
|
% ── 7. Conclusion ────────────────────────────────────────────────────────────
|
|
\section{Conclusion}
|
|
\label{sec:conclusion}
|
|
|
|
TODO
|
|
|
|
% ── References ───────────────────────────────────────────────────────────────
|
|
\bibliographystyle{ACM-Reference-Format}
|
|
\bibliography{refs}
|
|
|
|
\end{document}
|