initial move
This commit is contained in:
parent
719e611e39
commit
7750ae3d8c
|
|
@ -0,0 +1,40 @@
|
|||
# Build outputs
|
||||
harness/build/
|
||||
results/
|
||||
|
||||
# Compiled binaries (analysis tools, when written)
|
||||
analysis/target/
|
||||
analysis/build/
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Algorithm submodule build artifacts
|
||||
algorithms/*/build/
|
||||
algorithms/*/Makefile
|
||||
algorithms/**/*.o
|
||||
|
||||
# Generated data (re-runnable)
|
||||
data/objdump/
|
||||
|
||||
# LaTeX build artifacts
|
||||
paper/*.aux
|
||||
paper/*.log
|
||||
paper/*.out
|
||||
paper/*.toc
|
||||
paper/*.bbl
|
||||
paper/*.blg
|
||||
paper/*.fls
|
||||
paper/*.fdb_latexmk
|
||||
paper/*.synctex.gz
|
||||
paper/*.pdf
|
||||
|
||||
# Personal reference docs
|
||||
ROADMAP.md
|
||||
IDEATION.md
|
||||
|
||||
# OS / editor
|
||||
.DS_Store
|
||||
.idea/
|
||||
*.swp
|
||||
*~
|
||||
17
README.md
17
README.md
|
|
@ -1,2 +1,15 @@
|
|||
# cs1952y-final
|
||||
Profiling Kyber: what are the implications of SIMD-optimized cryptography on performance and energy?
|
||||
# pqc-simd-bench
|
||||
|
||||
Empirical study of SIMD optimization in post-quantum cryptography — performance
|
||||
and energy analysis across algorithm variants and security parameter sets.
|
||||
|
||||
```
|
||||
algorithms/ PQC algorithm submodules (ML-KEM, ML-DSA, ...)
|
||||
harness/ Benchmark harness (C, CMake)
|
||||
slurm/ SLURM job templates for OSCAR
|
||||
data/raw/ Raw benchmark output, organized by algorithm and parameter set
|
||||
results/ Processed outputs and figures (generated, gitignored)
|
||||
paper/ LaTeX write-up
|
||||
```
|
||||
|
||||
**Author:** Levi Neuwirth — [ln@levineuwirth.org](mailto:ln@levineuwirth.org)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -1,18 +0,0 @@
|
|||
#!/bin/sh
|
||||
# TODO: change me!
|
||||
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
|
||||
# -n: how many CPU cores do you want to run your job?
|
||||
# --mem: how much memory do you want?
|
||||
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
|
||||
# --constraint=intel: required for power monitoring
|
||||
|
||||
#SBATCH -p batch
|
||||
#SBATCH -n 1
|
||||
#SBATCH --mem=1g
|
||||
#SBATCH -t 60:00
|
||||
#SBATCH --constraint=intel
|
||||
for i in {1..1000}
|
||||
do
|
||||
echo "Loop spin:" $i
|
||||
./test_speed1024
|
||||
done
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#!/bin/sh
|
||||
# TODO: change me!
|
||||
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
|
||||
# -n: how many CPU cores do you want to run your job?
|
||||
# --mem: how much memory do you want?
|
||||
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
|
||||
# --constraint=intel: required for power monitoring
|
||||
|
||||
#SBATCH -p batch
|
||||
#SBATCH -n 1
|
||||
#SBATCH --mem=1g
|
||||
#SBATCH -t 60:00
|
||||
#SBATCH --constraint=intel
|
||||
for i in {1..1000}
|
||||
do
|
||||
echo "Loop spin:" $i
|
||||
./test_speed512
|
||||
done
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#!/bin/sh
|
||||
# TODO: change me!
|
||||
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
|
||||
# -n: how many CPU cores do you want to run your job?
|
||||
# --mem: how much memory do you want?
|
||||
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
|
||||
# --constraint=intel: required for power monitoring
|
||||
|
||||
#SBATCH -p batch
|
||||
#SBATCH -n 1
|
||||
#SBATCH --mem=1g
|
||||
#SBATCH -t 60:00
|
||||
#SBATCH --constraint=intel
|
||||
for i in {1..1000}
|
||||
do
|
||||
echo "Loop spin:" $i
|
||||
./test_speed768
|
||||
done
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
cmake_minimum_required(VERSION 3.20)
|
||||
project(pqc-bench C)
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
# ── Compiler flags ──────────────────────────────────────────────────────────
|
||||
# Release build with full optimization; override on the command line:
|
||||
# cmake -DCMAKE_BUILD_TYPE=Debug ..
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "-O3 -march=native")
|
||||
|
||||
# ── Algorithm root (submodule) ───────────────────────────────────────────────
|
||||
# Each target below compiles a variant of test_speed.c against a specific
|
||||
# algorithm build. Add algorithm libraries as submodule CMake subdirectories
|
||||
# or via add_library() here as the project grows.
|
||||
#
|
||||
# Example layout once kyber submodule is added:
|
||||
# algorithms/kyber/ref/ → static lib kyber512_ref, kyber768_ref, kyber1024_ref
|
||||
# algorithms/kyber/avx2/ → static lib kyber512_avx2, ...
|
||||
|
||||
# ── Harness source ───────────────────────────────────────────────────────────
|
||||
set(HARNESS_SRC src/test_speed.c)
|
||||
|
||||
# ── Build variants ───────────────────────────────────────────────────────────
|
||||
# Uncomment and adjust as algorithm libraries become available.
|
||||
#
|
||||
# foreach(PARAM 512 768 1024)
|
||||
# foreach(VARIANT ref refnv)
|
||||
# set(TARGET "bench_mlkem${PARAM}_${VARIANT}")
|
||||
# add_executable(${TARGET} ${HARNESS_SRC})
|
||||
# target_include_directories(${TARGET} PRIVATE
|
||||
# ${CMAKE_SOURCE_DIR}/../algorithms/kyber/${VARIANT})
|
||||
# target_link_libraries(${TARGET} kyber${PARAM}_${VARIANT})
|
||||
# target_compile_definitions(${TARGET} PRIVATE KYBER_K=${PARAM})
|
||||
# endforeach()
|
||||
# endforeach()
|
||||
|
||||
# ── PAPI (hardware performance counters) ─────────────────────────────────────
|
||||
# Optional; enable with -DWITH_PAPI=ON
|
||||
option(WITH_PAPI "Link against PAPI for hardware counter collection" OFF)
|
||||
if(WITH_PAPI)
|
||||
find_library(PAPI_LIB papi REQUIRED)
|
||||
find_path(PAPI_INCLUDE papi.h REQUIRED)
|
||||
# Targets that need PAPI:
|
||||
# target_include_directories(<target> PRIVATE ${PAPI_INCLUDE})
|
||||
# target_link_libraries(<target> ${PAPI_LIB})
|
||||
endif()
|
||||
|
||||
# ── RAPL energy measurement ──────────────────────────────────────────────────
|
||||
# Optional; enable with -DWITH_RAPL=ON (requires root or CAP_SYS_RAWIO)
|
||||
option(WITH_RAPL "Enable Intel RAPL energy measurement" OFF)
|
||||
if(WITH_RAPL)
|
||||
# target_compile_definitions(<target> PRIVATE WITH_RAPL)
|
||||
endif()
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
\documentclass[sigconf, nonacm]{acmart}
|
||||
|
||||
\usepackage{booktabs}
|
||||
\usepackage{microtype}
|
||||
\usepackage{pgfplots}
|
||||
\pgfplotsset{compat=1.18}
|
||||
|
||||
% ── Metadata (fill in when ready) ────────────────────────────────────────────
|
||||
\title{SIMD Optimization in Post-Quantum Cryptography:\\
|
||||
A Micro-Architecture and Energy Analysis}
|
||||
|
||||
\author{Levi Neuwirth}
|
||||
\affiliation{%
|
||||
\institution{Brown University}
|
||||
\city{Providence}
|
||||
\state{Rhode Island}
|
||||
\country{USA}
|
||||
}
|
||||
\email{ln@levineuwirth.org}
|
||||
|
||||
\begin{abstract}
|
||||
TODO
|
||||
\end{abstract}
|
||||
|
||||
\keywords{post-quantum cryptography, ML-KEM, Kyber, SIMD, AVX2, performance
|
||||
analysis, energy efficiency, micro-architecture}
|
||||
|
||||
% ─────────────────────────────────────────────────────────────────────────────
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
% ── 1. Introduction ──────────────────────────────────────────────────────────
|
||||
\section{Introduction}
|
||||
\label{sec:intro}
|
||||
|
||||
TODO
|
||||
|
||||
% ── 2. Background ────────────────────────────────────────────────────────────
|
||||
\section{Background}
|
||||
\label{sec:background}
|
||||
|
||||
\subsection{ML-KEM / Kyber}
|
||||
TODO: Module-LWE, ring structure, NTT.
|
||||
|
||||
\subsection{SIMD on x86-64}
|
||||
TODO: AVX2 register model, relevant instructions for polynomial arithmetic.
|
||||
|
||||
\subsection{Hardware Performance Counters and RAPL}
|
||||
TODO: perf, PAPI, Intel RAPL energy domains.
|
||||
|
||||
% ── 3. Methodology ───────────────────────────────────────────────────────────
|
||||
\section{Methodology}
|
||||
\label{sec:methodology}
|
||||
|
||||
\subsection{Implementation Variants}
|
||||
TODO: ref (AVX2 intrinsics), refnv (scalar, no vectorization), refo0 (unoptimized
|
||||
baseline).
|
||||
|
||||
\subsection{Benchmark Harness}
|
||||
TODO: cycle counter, iteration count, statistical methodology, OSCAR node spec.
|
||||
|
||||
\subsection{Hardware Counter Collection}
|
||||
TODO: PAPI events selected and why.
|
||||
|
||||
\subsection{Energy Measurement}
|
||||
TODO: RAPL pkg + DRAM domains, joules-per-operation derivation.
|
||||
|
||||
% ── 4. Results ───────────────────────────────────────────────────────────────
|
||||
\section{Results}
|
||||
\label{sec:results}
|
||||
|
||||
\subsection{Cycle Counts}
|
||||
|
||||
\begin{table}[h]
|
||||
\caption{Median cycle counts, ML-KEM-512, 10\,000 iterations.}
|
||||
\label{tab:cycles512}
|
||||
\begin{tabular}{lrrr}
|
||||
\toprule
|
||||
Operation & ref (AVX2) & refnv (scalar) & speedup \\
|
||||
\midrule
|
||||
NTT & TODO & TODO & TODO$\times$ \\
|
||||
INVNTT & TODO & TODO & TODO$\times$ \\
|
||||
polyvec\_basemul\_acc & TODO & TODO & TODO$\times$ \\
|
||||
indcpa\_keypair & TODO & TODO & TODO$\times$ \\
|
||||
indcpa\_enc & TODO & TODO & TODO$\times$ \\
|
||||
kyber\_encaps & TODO & TODO & TODO$\times$ \\
|
||||
kyber\_decaps & TODO & TODO & TODO$\times$ \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
\subsection{Hardware Counter Breakdown}
|
||||
TODO: IPC, cache miss rates, branch mispredictions.
|
||||
|
||||
\subsection{Energy Efficiency}
|
||||
TODO: joules/operation, EDP comparison.
|
||||
|
||||
% ── 5. Discussion ────────────────────────────────────────────────────────────
|
||||
\section{Discussion}
|
||||
\label{sec:discussion}
|
||||
|
||||
TODO: mechanistic explanation of where the speedup comes from.
|
||||
|
||||
% ── 6. Related Work ──────────────────────────────────────────────────────────
|
||||
\section{Related Work}
|
||||
\label{sec:related}
|
||||
|
||||
TODO
|
||||
|
||||
% ── 7. Conclusion ────────────────────────────────────────────────────────────
|
||||
\section{Conclusion}
|
||||
\label{sec:conclusion}
|
||||
|
||||
TODO
|
||||
|
||||
% ── References ───────────────────────────────────────────────────────────────
|
||||
\bibliographystyle{ACM-Reference-Format}
|
||||
\bibliography{refs}
|
||||
|
||||
\end{document}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
% ── Post-Quantum Cryptography Standards ──────────────────────────────────────
|
||||
|
||||
@techreport{fips203,
|
||||
author = {{National Institute of Standards and Technology}},
|
||||
title = {{Module-Lattice-Based Key-Encapsulation Mechanism Standard}},
|
||||
institution = {NIST},
|
||||
year = {2024},
|
||||
number = {FIPS 203},
|
||||
url = {https://doi.org/10.6028/NIST.FIPS.203},
|
||||
}
|
||||
|
||||
@techreport{fips204,
|
||||
author = {{National Institute of Standards and Technology}},
|
||||
title = {{Module-Lattice-Based Digital Signature Standard}},
|
||||
institution = {NIST},
|
||||
year = {2024},
|
||||
number = {FIPS 204},
|
||||
url = {https://doi.org/10.6028/NIST.FIPS.204},
|
||||
}
|
||||
|
||||
@techreport{fips205,
|
||||
author = {{National Institute of Standards and Technology}},
|
||||
title = {{Stateless Hash-Based Digital Signature Standard}},
|
||||
institution = {NIST},
|
||||
year = {2024},
|
||||
number = {FIPS 205},
|
||||
url = {https://doi.org/10.6028/NIST.FIPS.205},
|
||||
}
|
||||
|
||||
% ── Kyber / ML-KEM ───────────────────────────────────────────────────────────
|
||||
|
||||
@inproceedings{kyber2018,
|
||||
author = {Bos, Joppe W. and Ducas, Léo and Kiltz, Eike and Lepoint, Tancrède
|
||||
and Lyubashevsky, Vadim and Schanck, John M. and Schwabe, Peter
|
||||
and Seiler, Gregor and Stehlé, Damien},
|
||||
title = {{CRYSTALS -- Kyber: A CCA-Secure Module-Lattice-Based KEM}},
|
||||
booktitle = {IEEE European Symposium on Security and Privacy (EuroS\&P)},
|
||||
year = {2018},
|
||||
pages = {353--367},
|
||||
doi = {10.1109/EuroSP.2018.00032},
|
||||
}
|
||||
|
||||
@misc{kyber-avx2,
|
||||
author = {Schwabe, Peter and Seiler, Gregor},
|
||||
title = {{Better Bootstrapping in Fully Homomorphic Encryption}},
|
||||
note = {AVX2 implementation in the pqclean project},
|
||||
url = {https://github.com/pq-crystals/kyber},
|
||||
}
|
||||
|
||||
% ── SIMD and Microarchitecture ────────────────────────────────────────────────
|
||||
|
||||
@inproceedings{intel-avx2,
|
||||
author = {{Intel Corporation}},
|
||||
title = {{Intel 64 and IA-32 Architectures Software Developer's Manual}},
|
||||
year = {2024},
|
||||
note = {Volume 2: Instruction Set Reference},
|
||||
}
|
||||
|
||||
@inproceedings{ntt-survey,
|
||||
author = {Longa, Patrick and Naehrig, Michael},
|
||||
title = {{Speeding Up the Number Theoretic Transform for Faster Ideal
|
||||
Lattice-Based Cryptography}},
|
||||
booktitle = {CANS},
|
||||
year = {2016},
|
||||
doi = {10.1007/978-3-319-48965-0_8},
|
||||
}
|
||||
|
||||
% ── Energy Measurement ───────────────────────────────────────────────────────
|
||||
|
||||
@inproceedings{rapl,
|
||||
author = {David, Howard and Gorbatov, Eugene and Hanebutte, Ulf R. and
|
||||
Khanna, Rahul and Le, Christian},
|
||||
title = {{RAPL: Memory Power Estimation and Capping}},
|
||||
booktitle = {ISLPED},
|
||||
year = {2010},
|
||||
doi = {10.1145/1840845.1840883},
|
||||
}
|
||||
|
||||
% ── Related Benchmarking Work ────────────────────────────────────────────────
|
||||
|
||||
@misc{pqclean,
|
||||
author = {{PQClean Contributors}},
|
||||
title = {{PQClean: Clean, portable, tested implementations of post-quantum
|
||||
cryptography}},
|
||||
url = {https://github.com/PQClean/PQClean},
|
||||
}
|
||||
|
||||
@misc{liboqs,
|
||||
author = {{Open Quantum Safe Project}},
|
||||
title = {{liboqs: C library for quantum-safe cryptographic algorithms}},
|
||||
url = {https://github.com/open-quantum-safe/liboqs},
|
||||
}
|
||||
|
||||
@misc{pqm4,
|
||||
author = {Kannwischer, Matthias J. and Rijneveld, Joost and Schwabe, Peter
|
||||
and Stoffelen, Ko},
|
||||
title = {{pqm4: Post-quantum crypto library for the ARM Cortex-M4}},
|
||||
url = {https://github.com/mupq/pqm4},
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
#!/bin/bash
|
||||
# Template SLURM job for ML-KEM benchmarking.
|
||||
# Variables filled in by slurm/submit.sh:
|
||||
# PARAM — 512 | 768 | 1024
|
||||
# VARIANT — ref | refnv | avx2 | ...
|
||||
# NTESTS — iterations per operation (default 10000)
|
||||
# BINARY — path to compiled benchmark binary
|
||||
|
||||
#SBATCH -J bench_mlkem${PARAM}_${VARIANT}
|
||||
#SBATCH -p batch
|
||||
#SBATCH -n 1
|
||||
#SBATCH --mem=2G
|
||||
#SBATCH -t 02:00:00
|
||||
#SBATCH --constraint=intel
|
||||
#SBATCH -o %j_mlkem${PARAM}_${VARIANT}.out
|
||||
|
||||
# Pin to a single core, disable frequency scaling for deterministic measurements.
|
||||
# Requires appropriate OSCAR allocation; skip if unavailable.
|
||||
export GOMP_CPU_AFFINITY="0"
|
||||
|
||||
NTESTS=${NTESTS:-10000}
|
||||
BINARY=${BINARY:-./bench_mlkem${PARAM}_${VARIANT}}
|
||||
|
||||
if [[ ! -x "$BINARY" ]]; then
|
||||
echo "ERROR: binary not found or not executable: $BINARY" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== bench_mlkem${PARAM}_${VARIANT} ==="
|
||||
echo "SLURM_JOB_ID: $SLURM_JOB_ID"
|
||||
echo "SLURM_NODELIST: $SLURM_NODELIST"
|
||||
echo "NTESTS: $NTESTS"
|
||||
echo "DATE: $(date -Iseconds)"
|
||||
echo "UNAME: $(uname -a)"
|
||||
echo "CPU: $(grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs)"
|
||||
echo "---"
|
||||
|
||||
"$BINARY" "$NTESTS"
|
||||
Binary file not shown.
|
|
@ -1,178 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* FILE: analyze_simd.go
|
||||
* LATEST: 10:08 05 May 2025
|
||||
* DESC: find percentage of a dumped amd64 object file's instructions that are SIMD instructions
|
||||
* AUTHOR: Levi Neuwirth <ln@levineuwirth.org>
|
||||
*
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var total int
|
||||
var simd int
|
||||
|
||||
// Since Go doesn't have a hashset, we will use a hashmap and ignore the Value...
|
||||
var simdInstr map[string]bool
|
||||
var digits []string
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 1 {
|
||||
log.Fatal("Usage: ./analyze_simd <path to .txt from objdump>")
|
||||
}
|
||||
|
||||
objDumpRaw, err := os.Open(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else {
|
||||
fmt.Println("Successfully opened object dump. Investigating...")
|
||||
}
|
||||
defer objDumpRaw.Close()
|
||||
|
||||
initDigits()
|
||||
initSimdInstructions()
|
||||
|
||||
// This regex magic will get us the instructions from an extracted objdump line.
|
||||
instrRegex := regexp.MustCompile(`\b([a-z]{2,6}[a-z]*)\b`)
|
||||
|
||||
scanner := bufio.NewScanner(objDumpRaw)
|
||||
for scanner.Scan() {
|
||||
localLine := scanner.Text()
|
||||
localLineSplit := strings.Fields(localLine)
|
||||
|
||||
if len(localLineSplit) < 2 || !strings.Contains(localLineSplit[0], ":") {
|
||||
continue
|
||||
}
|
||||
|
||||
matches := instrRegex.FindAllString(localLine, -1)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
instr := matches[0]
|
||||
log.Println(instr)
|
||||
if simdInstr[instr] {
|
||||
simd++
|
||||
}
|
||||
total++
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
fmt.Printf("The result is:\n%d SIMD instructions\n%d Total instructions\n", simd, total)
|
||||
}
|
||||
|
||||
func initSimdInstructions() {
|
||||
simdInstr = map[string]bool{
|
||||
// MMX Instructions
|
||||
"packsswb": true, "packssdw": true,
|
||||
"packuswb": true, "paddb": true, "paddw": true, "paddd": true,
|
||||
"paddsb": true, "paddsw": true, "paddusb": true, "paddusw": true,
|
||||
"pand": true, "pandn": true, "pcmpeqb": true, "pcmpeqw": true,
|
||||
"pcmpeqd": true, "pcmpgtb": true, "pcmpgtw": true, "pcmpgtd": true,
|
||||
"pmaddwd": true, "pmulhw": true, "pmullw": true, "por": true,
|
||||
"psllw": true, "pslld": true, "psllq": true, "psraw": true,
|
||||
"psrad": true, "psrlw": true, "psrld": true, "psrlq": true,
|
||||
"psubb": true, "psubw": true, "psubd": true, "psubsb": true,
|
||||
"psubsw": true, "psubusb": true, "psubusw": true, "punpckhbw": true,
|
||||
"punpckhwd": true, "punpckhdq": true, "punpcklbw": true, "punpcklwd": true,
|
||||
"punpckldq": true, "pxor": true,
|
||||
|
||||
// SSE Instructions
|
||||
"addps": true, "addss": true, "andps": true, "andnps": true,
|
||||
"cmpeqps": true, "cmpeqss": true, "cmpgeps": true, "cmpgess": true,
|
||||
"cmpgtps": true, "cmpgtss": true, "cmpleps": true, "cmpless": true,
|
||||
"cmpltps": true, "cmpltss": true, "cmpneqps": true, "cmpneqss": true,
|
||||
"cmpngeps": true, "cmpngess": true, "cmpngtps": true, "cmpngtss": true,
|
||||
"cmpnleps": true, "cmpnless": true, "cmpnltps": true, "cmpnltss": true,
|
||||
"cmpordps": true, "cmpordss": true, "cmpunordps": true, "cmpunordss": true,
|
||||
"divps": true, "divss": true, "maxps": true, "maxss": true,
|
||||
"minps": true, "minss": true, "movaps": true, "movss": true,
|
||||
"movups": true, "mulps": true, "mulss": true, "rcpps": true,
|
||||
"rcpss": true, "rsqrtps": true, "rsqrtss": true, "sqrtps": true,
|
||||
"sqrtss": true, "subps": true, "subss": true, "xorps": true,
|
||||
|
||||
// SSE2 Instructions
|
||||
"addpd": true, "addsd": true, "andpd": true, "andnpd": true,
|
||||
"cmpeqpd": true, "cmpeqsd": true, "cmpgepd": true, "cmpgesd": true,
|
||||
"cmpgtpd": true, "cmpgtsd": true, "cmplepd": true, "cmplesd": true,
|
||||
"cmpltpd": true, "cmpltsd": true, "cmpneqpd": true, "cmpneqsd": true,
|
||||
"cmpngepd": true, "cmpngesd": true, "cmpngtpd": true, "cmpngtsd": true,
|
||||
"cmpnlepd": true, "cmpnlesd": true, "cmpnltpd": true, "cmpnltsd": true,
|
||||
"cmpordpd": true, "cmpordsd": true, "cmpunordpd": true, "cmpunordsd": true,
|
||||
"divpd": true, "divsd": true, "maxpd": true, "maxsd": true,
|
||||
"minpd": true, "minsd": true, "movapd": true, "movsd": true,
|
||||
"movupd": true, "mulpd": true, "mulsd": true, "sqrtpd": true,
|
||||
"subpd": true, "subsd": true, "xorpd": true,
|
||||
|
||||
// SSE3 Instructions
|
||||
"addsubpd": true, "addsubps": true, "haddpd": true, "haddps": true,
|
||||
"hsubpd": true, "hsubps": true, "lddqu": true, "monitor": true,
|
||||
"mwait": true, "movddup": true, "movshdup": true, "movsldup": true,
|
||||
|
||||
// SSSE3 Instructions
|
||||
"pshufb": true, "phaddw": true, "phaddd": true, "phaddsw": true,
|
||||
"pmaddubsw": true, "phsubw": true, "phsubd": true, "phsubsw": true,
|
||||
"psignb": true, "psignw": true, "psignd": true, "pmulhrsw": true,
|
||||
"palignr": true,
|
||||
|
||||
// SSE4.1 Instructions
|
||||
"blendpd": true, "blendps": true, "blendvpd": true, "blendvps": true,
|
||||
"dppd": true, "dpps": true, "extractps": true, "insertps": true,
|
||||
"movntdqa": true, "mpsadbw": true, "packusdw": true, "pblendvb": true,
|
||||
"pblendw": true, "pcmpeqq": true, "pextrb": true, "pextrd": true,
|
||||
"pextrq": true, "phminposuw": true, "pinsrb": true, "pinsrd": true,
|
||||
"pinsrq": true, "pmuldq": true, "pmulld": true, "ptest": true,
|
||||
"roundpd": true, "roundps": true, "roundsd": true, "roundss": true,
|
||||
|
||||
// SSE4.2 Instructions
|
||||
"pcmpestri": true, "pcmpestrm": true, "pcmpistri": true, "pcmpistrm": true,
|
||||
"crc32": true, "popcnt": true,
|
||||
|
||||
// AVX Instructions
|
||||
"vaddpd": true, "vaddps": true, "vaddsd": true, "vaddss": true,
|
||||
"vandpd": true, "vandps": true, "vandnpd": true, "vandnps": true,
|
||||
"vdivpd": true, "vdivps": true, "vdivsd": true, "vdivss": true,
|
||||
"vmaxpd": true, "vmaxps": true, "vmaxsd": true, "vmaxss": true,
|
||||
"vminpd": true, "vminps": true, "vminsd": true, "vminss": true,
|
||||
"vmulpd": true, "vmulps": true, "vmulsd": true, "vmulss": true,
|
||||
"vorpd": true, "vorps": true, "vsqrtpd": true, "vsqrtps": true,
|
||||
"vsqrtsd": true, "vsqrtss": true, "vsubpd": true, "vsubps": true,
|
||||
"vsubsd": true, "vsubss": true, "vxorpd": true, "vxorps": true,
|
||||
|
||||
// AVX2 Instructions
|
||||
"vpabsb": true, "vpabsw": true, "vpabsd": true, "vpaddb": true,
|
||||
"vpaddw": true, "vpaddd": true, "vpaddq": true, "vpaddsb": true,
|
||||
"vpaddsw": true, "vpaddusb": true, "vpaddusw": true, "vpalignr": true,
|
||||
"vpand": true, "vpandn": true, "vpavgb": true, "vpavgw": true,
|
||||
"vpblendd": true, "vpcmpeqb": true, "vpcmpeqw": true, "vpcmpeqd": true,
|
||||
"vpcmpeqq": true, "vpcmpgtb": true, "vpcmpgtw": true, "vpcmpgtd": true,
|
||||
|
||||
// AVX512 not included since Kyber does not use it.
|
||||
}
|
||||
}
|
||||
|
||||
func initDigits() {
|
||||
digits = make([]string, 0)
|
||||
digits = append(digits, "0")
|
||||
digits = append(digits, "1")
|
||||
digits = append(digits, "2")
|
||||
digits = append(digits, "3")
|
||||
digits = append(digits, "4")
|
||||
digits = append(digits, "5")
|
||||
digits = append(digits, "6")
|
||||
digits = append(digits, "7")
|
||||
digits = append(digits, "8")
|
||||
digits = append(digits, "9")
|
||||
}
|
||||
Binary file not shown.
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* FILE: testrun_sum_std.go
|
||||
* LATEST: 10:19 05 May 2025
|
||||
* DESC: sum values from iterative Kyber batch jobs.
|
||||
* AUTHOR: Levi Neuwirth <ln@levineuwirth.org>
|
||||
*
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var count float64
|
||||
var testSums map[string]float64
|
||||
var lastTest string
|
||||
var gen_a []float64
|
||||
var indcpa_keypair []float64
|
||||
var indcpa_enc []float64
|
||||
var keypair_derand []float64
|
||||
var keypair []float64
|
||||
var encaps []float64
|
||||
var decaps []float64
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 1 {
|
||||
log.Fatal("Usage: ./testrun_sum_std <path to slurm.OUT file>")
|
||||
}
|
||||
|
||||
outRaw, err := os.Open(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
} else {
|
||||
fmt.Println("Successfully opened slurm STDOUT")
|
||||
}
|
||||
defer outRaw.Close()
|
||||
initTestSums()
|
||||
count = 0
|
||||
lastTest = "none"
|
||||
scanner := bufio.NewScanner(outRaw)
|
||||
for scanner.Scan() {
|
||||
localLine := scanner.Text()
|
||||
// Check if we've hit a new test iteration
|
||||
if strings.Contains(localLine, "Loop spin:") {
|
||||
count += 1
|
||||
continue
|
||||
// Otherwise, we might have data from a previously indicated test.
|
||||
} else if strings.Contains(localLine, "average:") {
|
||||
// We split the line and add to the appropriate testSums index.
|
||||
line := localLine[9:]
|
||||
var numberStr strings.Builder
|
||||
for _, ch := range line {
|
||||
if (ch >= '0' && ch <= '9') || ch == '.' {
|
||||
numberStr.WriteRune(ch)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
add, err := strconv.ParseFloat(numberStr.String(), 64)
|
||||
if err != nil {
|
||||
log.Printf("Failed to parse number from line %q: %v", localLine, err)
|
||||
continue
|
||||
}
|
||||
testSums[lastTest] += add
|
||||
// And now for the stddev:
|
||||
switch lastTest {
|
||||
case "gen_a:":
|
||||
gen_a = append(gen_a, add)
|
||||
case "indcpa_keypair:":
|
||||
indcpa_keypair = append(indcpa_keypair, add)
|
||||
case "indcpa_enc:":
|
||||
indcpa_enc = append(indcpa_enc, add)
|
||||
case "kyber_keypair_derand:":
|
||||
keypair_derand = append(keypair_derand, add)
|
||||
case "kyber_keypair:":
|
||||
keypair = append(keypair, add)
|
||||
case "kyber_encaps:":
|
||||
encaps = append(encaps, add)
|
||||
case "kyber_decaps:":
|
||||
decaps = append(decaps, add)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
continue
|
||||
// We aren't concerned with the medians here.
|
||||
} else if strings.Contains(localLine, "median:") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Here, figure out what the test was for the next data.
|
||||
trimmed := strings.TrimSpace(localLine)
|
||||
if strings.HasSuffix(trimmed, ":") && !strings.Contains(trimmed, "average") && !strings.Contains(trimmed, "median") {
|
||||
lastTest = trimmed
|
||||
continue
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Now we take the averages and stddevs.
|
||||
fmt.Printf("gen_a avg: %f\ngen_a stddev: %f\n", testSums["gen_a:"]/count, calcStddev("gen_a:", gen_a))
|
||||
fmt.Printf("indcpa keypair avg: %f\nindcpa_keypair stddev: %f\n", testSums["indcpa_keypair:"]/count, calcStddev("indcpa_keypair:", indcpa_keypair))
|
||||
fmt.Printf("indcpa enc avg: %f\nindcpa_enc stddev: %f\n", testSums["indcpa_enc:"]/count, calcStddev("indcpa_enc:", indcpa_enc))
|
||||
fmt.Printf("keypair_derand avg: %f\nkeypair_derand stddev:: %f\n", testSums["kyber_keypair_derand:"]/count, calcStddev("kyber_keypair_derand:", keypair_derand))
|
||||
fmt.Printf("keypair avg: %f\nkeypair stddev:: %f\n", testSums["kyber_keypair:"]/count, calcStddev("kyber_keypair:", keypair))
|
||||
fmt.Printf("encaps avg: %f\nencaps stddev:: %f\n", testSums["kyber_encaps:"]/count, calcStddev("kyber_encaps:", encaps))
|
||||
fmt.Printf("decaps avg: %f\ndecaps stddev:: %f\n", testSums["kyber_decaps:"]/count, calcStddev("kyber_decaps:", decaps))
|
||||
}
|
||||
|
||||
func initTestSums() {
|
||||
testSums = make(map[string]float64)
|
||||
testSums["gen_a:"] = 0
|
||||
testSums["indcpa_keypair:"] = 0
|
||||
testSums["indcpa_enc:"] = 0
|
||||
testSums["kyber_keypair_derand:"] = 0
|
||||
testSums["kyber_keypair:"] = 0
|
||||
testSums["kyber_encaps:"] = 0
|
||||
testSums["kyber_decaps:"] = 0
|
||||
}
|
||||
|
||||
func calcStddev(test string, inputs []float64) (result float64) {
|
||||
mean := float64(testSums[test] / float64(len(inputs)))
|
||||
var variance float64
|
||||
for _, value := range inputs {
|
||||
variance += (value - mean) * (value - mean)
|
||||
}
|
||||
|
||||
return math.Sqrt(variance / float64(len(inputs)))
|
||||
}
|
||||
Loading…
Reference in New Issue