going public!

This commit is contained in:
Levi Neuwirth 2025-05-05 18:07:03 -04:00
parent 5b7926de49
commit 719e611e39
31 changed files with 1035315 additions and 0 deletions

File diff suppressed because it is too large Load Diff

8968
benchmark/objdump/ref512.txt Normal file

File diff suppressed because it is too large Load Diff

8969
benchmark/objdump/ref768.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

18
benchmark/test_1024.sh Normal file
View File

@ -0,0 +1,18 @@
#!/bin/sh
# TODO: change me!
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
# -n: how many CPU cores do you want to run your job?
# --mem: how much memory do you want?
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
# --constraint=intel: required for power monitoring
#SBATCH -p batch
#SBATCH -n 1
#SBATCH --mem=1g
#SBATCH -t 60:00
#SBATCH --constraint=intel
for i in {1..1000}
do
echo "Loop spin:" $i
./test_speed1024
done

18
benchmark/test_512.sh Normal file
View File

@ -0,0 +1,18 @@
#!/bin/sh
# TODO: change me!
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
# -n: how many CPU cores do you want to run your job?
# --mem: how much memory do you want?
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
# --constraint=intel: required for power monitoring
#SBATCH -p batch
#SBATCH -n 1
#SBATCH --mem=1g
#SBATCH -t 60:00
#SBATCH --constraint=intel
for i in {1..1000}
do
echo "Loop spin:" $i
./test_speed512
done

18
benchmark/test_768.sh Normal file
View File

@ -0,0 +1,18 @@
#!/bin/sh
# TODO: change me!
# -p: which partition do you want to run your workload on? <batch, gpu, bigmem>
# -n: how many CPU cores do you want to run your job?
# --mem: how much memory do you want?
# -t: how long do you want to run the job before it timesout <hh:mm:ss>
# --constraint=intel: required for power monitoring
#SBATCH -p batch
#SBATCH -n 1
#SBATCH --mem=1g
#SBATCH -t 60:00
#SBATCH --constraint=intel
for i in {1..1000}
do
echo "Loop spin:" $i
./test_speed768
done

159
benchmark/test_speed.c Normal file
View File

@ -0,0 +1,159 @@
/*
* This file comes from the Kyber repo; see the files in kyber/avx2/test or kyber/ref/test for further details.
*/
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "../kem.h"
#include "../params.h"
#include "../indcpa.h"
#include "../polyvec.h"
#include "../poly.h"
#include "../randombytes.h"
#include "cpucycles.h"
#include "speed_print.h"
#define NTESTS 1000
uint64_t t[NTESTS];
uint8_t seed[KYBER_SYMBYTES] = {0};
int main(void)
{
unsigned int i;
uint8_t pk[CRYPTO_PUBLICKEYBYTES];
uint8_t sk[CRYPTO_SECRETKEYBYTES];
uint8_t ct[CRYPTO_CIPHERTEXTBYTES];
uint8_t key[CRYPTO_BYTES];
uint8_t coins32[KYBER_SYMBYTES];
uint8_t coins64[2*KYBER_SYMBYTES];
polyvec matrix[KYBER_K];
poly ap;
randombytes(coins32, KYBER_SYMBYTES);
randombytes(coins64, 2*KYBER_SYMBYTES);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
gen_matrix(matrix, seed, 0);
}
print_results("gen_a: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_getnoise_eta1(&ap, seed, 0);
}
print_results("poly_getnoise_eta1: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_getnoise_eta2(&ap, seed, 0);
}
print_results("poly_getnoise_eta2: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_ntt(&ap);
}
print_results("NTT: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_invntt_tomont(&ap);
}
print_results("INVNTT: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
polyvec_basemul_acc_montgomery(&ap, &matrix[0], &matrix[1]);
}
print_results("polyvec_basemul_acc_montgomery: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_tomsg(ct,&ap);
}
print_results("poly_tomsg: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_frommsg(&ap,ct);
}
print_results("poly_frommsg: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_compress(ct,&ap);
}
print_results("poly_compress: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
poly_decompress(&ap,ct);
}
print_results("poly_decompress: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
polyvec_compress(ct,&matrix[0]);
}
print_results("polyvec_compress: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
polyvec_decompress(&matrix[0],ct);
}
print_results("polyvec_decompress: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
indcpa_keypair_derand(pk, sk, coins32);
}
print_results("indcpa_keypair: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
indcpa_enc(ct, key, pk, seed);
}
print_results("indcpa_enc: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
indcpa_dec(key, ct, sk);
}
print_results("indcpa_dec: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
crypto_kem_keypair_derand(pk, sk, coins64);
}
print_results("kyber_keypair_derand: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
crypto_kem_keypair(pk, sk);
}
print_results("kyber_keypair: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
crypto_kem_enc_derand(ct, key, pk, coins32);
}
print_results("kyber_encaps_derand: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
crypto_kem_enc(ct, key, pk);
}
print_results("kyber_encaps: ", t, NTESTS);
for(i=0;i<NTESTS;i++) {
t[i] = cpucycles();
crypto_kem_dec(key, ct, sk);
}
print_results("kyber_decaps: ", t, NTESTS);
return 0;
}

1
kyber Submodule

@ -0,0 +1 @@
Subproject commit 4768bd37c02f9c40a46cb49d4d1f4d5e612bb882

BIN
util/analyze_simd Executable file

Binary file not shown.

178
util/analyze_simd.go Normal file
View File

@ -0,0 +1,178 @@
/*
*
* FILE: analyze_simd.go
* LATEST: 10:08 05 May 2025
* DESC: find percentage of a dumped amd64 object file's instructions that are SIMD instructions
* AUTHOR: Levi Neuwirth <ln@levineuwirth.org>
*
*/
package main
import (
"bufio"
"fmt"
"log"
"os"
"regexp"
"strings"
)
var total int
var simd int
// Since Go doesn't have a hashset, we will use a hashmap and ignore the Value...
var simdInstr map[string]bool
var digits []string
func main() {
if len(os.Args) < 1 {
log.Fatal("Usage: ./analyze_simd <path to .txt from objdump>")
}
objDumpRaw, err := os.Open(os.Args[1])
if err != nil {
log.Fatal(err)
} else {
fmt.Println("Successfully opened object dump. Investigating...")
}
defer objDumpRaw.Close()
initDigits()
initSimdInstructions()
// This regex magic will get us the instructions from an extracted objdump line.
instrRegex := regexp.MustCompile(`\b([a-z]{2,6}[a-z]*)\b`)
scanner := bufio.NewScanner(objDumpRaw)
for scanner.Scan() {
localLine := scanner.Text()
localLineSplit := strings.Fields(localLine)
if len(localLineSplit) < 2 || !strings.Contains(localLineSplit[0], ":") {
continue
}
matches := instrRegex.FindAllString(localLine, -1)
if len(matches) == 0 {
continue
}
instr := matches[0]
log.Println(instr)
if simdInstr[instr] {
simd++
}
total++
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
fmt.Printf("The result is:\n%d SIMD instructions\n%d Total instructions\n", simd, total)
}
func initSimdInstructions() {
simdInstr = map[string]bool{
// MMX Instructions
"packsswb": true, "packssdw": true,
"packuswb": true, "paddb": true, "paddw": true, "paddd": true,
"paddsb": true, "paddsw": true, "paddusb": true, "paddusw": true,
"pand": true, "pandn": true, "pcmpeqb": true, "pcmpeqw": true,
"pcmpeqd": true, "pcmpgtb": true, "pcmpgtw": true, "pcmpgtd": true,
"pmaddwd": true, "pmulhw": true, "pmullw": true, "por": true,
"psllw": true, "pslld": true, "psllq": true, "psraw": true,
"psrad": true, "psrlw": true, "psrld": true, "psrlq": true,
"psubb": true, "psubw": true, "psubd": true, "psubsb": true,
"psubsw": true, "psubusb": true, "psubusw": true, "punpckhbw": true,
"punpckhwd": true, "punpckhdq": true, "punpcklbw": true, "punpcklwd": true,
"punpckldq": true, "pxor": true,
// SSE Instructions
"addps": true, "addss": true, "andps": true, "andnps": true,
"cmpeqps": true, "cmpeqss": true, "cmpgeps": true, "cmpgess": true,
"cmpgtps": true, "cmpgtss": true, "cmpleps": true, "cmpless": true,
"cmpltps": true, "cmpltss": true, "cmpneqps": true, "cmpneqss": true,
"cmpngeps": true, "cmpngess": true, "cmpngtps": true, "cmpngtss": true,
"cmpnleps": true, "cmpnless": true, "cmpnltps": true, "cmpnltss": true,
"cmpordps": true, "cmpordss": true, "cmpunordps": true, "cmpunordss": true,
"divps": true, "divss": true, "maxps": true, "maxss": true,
"minps": true, "minss": true, "movaps": true, "movss": true,
"movups": true, "mulps": true, "mulss": true, "rcpps": true,
"rcpss": true, "rsqrtps": true, "rsqrtss": true, "sqrtps": true,
"sqrtss": true, "subps": true, "subss": true, "xorps": true,
// SSE2 Instructions
"addpd": true, "addsd": true, "andpd": true, "andnpd": true,
"cmpeqpd": true, "cmpeqsd": true, "cmpgepd": true, "cmpgesd": true,
"cmpgtpd": true, "cmpgtsd": true, "cmplepd": true, "cmplesd": true,
"cmpltpd": true, "cmpltsd": true, "cmpneqpd": true, "cmpneqsd": true,
"cmpngepd": true, "cmpngesd": true, "cmpngtpd": true, "cmpngtsd": true,
"cmpnlepd": true, "cmpnlesd": true, "cmpnltpd": true, "cmpnltsd": true,
"cmpordpd": true, "cmpordsd": true, "cmpunordpd": true, "cmpunordsd": true,
"divpd": true, "divsd": true, "maxpd": true, "maxsd": true,
"minpd": true, "minsd": true, "movapd": true, "movsd": true,
"movupd": true, "mulpd": true, "mulsd": true, "sqrtpd": true,
"subpd": true, "subsd": true, "xorpd": true,
// SSE3 Instructions
"addsubpd": true, "addsubps": true, "haddpd": true, "haddps": true,
"hsubpd": true, "hsubps": true, "lddqu": true, "monitor": true,
"mwait": true, "movddup": true, "movshdup": true, "movsldup": true,
// SSSE3 Instructions
"pshufb": true, "phaddw": true, "phaddd": true, "phaddsw": true,
"pmaddubsw": true, "phsubw": true, "phsubd": true, "phsubsw": true,
"psignb": true, "psignw": true, "psignd": true, "pmulhrsw": true,
"palignr": true,
// SSE4.1 Instructions
"blendpd": true, "blendps": true, "blendvpd": true, "blendvps": true,
"dppd": true, "dpps": true, "extractps": true, "insertps": true,
"movntdqa": true, "mpsadbw": true, "packusdw": true, "pblendvb": true,
"pblendw": true, "pcmpeqq": true, "pextrb": true, "pextrd": true,
"pextrq": true, "phminposuw": true, "pinsrb": true, "pinsrd": true,
"pinsrq": true, "pmuldq": true, "pmulld": true, "ptest": true,
"roundpd": true, "roundps": true, "roundsd": true, "roundss": true,
// SSE4.2 Instructions
"pcmpestri": true, "pcmpestrm": true, "pcmpistri": true, "pcmpistrm": true,
"crc32": true, "popcnt": true,
// AVX Instructions
"vaddpd": true, "vaddps": true, "vaddsd": true, "vaddss": true,
"vandpd": true, "vandps": true, "vandnpd": true, "vandnps": true,
"vdivpd": true, "vdivps": true, "vdivsd": true, "vdivss": true,
"vmaxpd": true, "vmaxps": true, "vmaxsd": true, "vmaxss": true,
"vminpd": true, "vminps": true, "vminsd": true, "vminss": true,
"vmulpd": true, "vmulps": true, "vmulsd": true, "vmulss": true,
"vorpd": true, "vorps": true, "vsqrtpd": true, "vsqrtps": true,
"vsqrtsd": true, "vsqrtss": true, "vsubpd": true, "vsubps": true,
"vsubsd": true, "vsubss": true, "vxorpd": true, "vxorps": true,
// AVX2 Instructions
"vpabsb": true, "vpabsw": true, "vpabsd": true, "vpaddb": true,
"vpaddw": true, "vpaddd": true, "vpaddq": true, "vpaddsb": true,
"vpaddsw": true, "vpaddusb": true, "vpaddusw": true, "vpalignr": true,
"vpand": true, "vpandn": true, "vpavgb": true, "vpavgw": true,
"vpblendd": true, "vpcmpeqb": true, "vpcmpeqw": true, "vpcmpeqd": true,
"vpcmpeqq": true, "vpcmpgtb": true, "vpcmpgtw": true, "vpcmpgtd": true,
// AVX512 not included since Kyber does not use it.
}
}
func initDigits() {
digits = make([]string, 0)
digits = append(digits, "0")
digits = append(digits, "1")
digits = append(digits, "2")
digits = append(digits, "3")
digits = append(digits, "4")
digits = append(digits, "5")
digits = append(digits, "6")
digits = append(digits, "7")
digits = append(digits, "8")
digits = append(digits, "9")
}

BIN
util/testrun_sum_std Executable file

Binary file not shown.

137
util/testrun_sum_std.go Normal file
View File

@ -0,0 +1,137 @@
/*
*
* FILE: testrun_sum_std.go
* LATEST: 10:19 05 May 2025
* DESC: sum values from iterative Kyber batch jobs.
* AUTHOR: Levi Neuwirth <ln@levineuwirth.org>
*
*/
package main
import (
"bufio"
"fmt"
"log"
"math"
"os"
"strconv"
"strings"
)
var count float64
var testSums map[string]float64
var lastTest string
var gen_a []float64
var indcpa_keypair []float64
var indcpa_enc []float64
var keypair_derand []float64
var keypair []float64
var encaps []float64
var decaps []float64
func main() {
if len(os.Args) < 1 {
log.Fatal("Usage: ./testrun_sum_std <path to slurm.OUT file>")
}
outRaw, err := os.Open(os.Args[1])
if err != nil {
log.Fatal(err)
} else {
fmt.Println("Successfully opened slurm STDOUT")
}
defer outRaw.Close()
initTestSums()
count = 0
lastTest = "none"
scanner := bufio.NewScanner(outRaw)
for scanner.Scan() {
localLine := scanner.Text()
// Check if we've hit a new test iteration
if strings.Contains(localLine, "Loop spin:") {
count += 1
continue
// Otherwise, we might have data from a previously indicated test.
} else if strings.Contains(localLine, "average:") {
// We split the line and add to the appropriate testSums index.
line := localLine[9:]
var numberStr strings.Builder
for _, ch := range line {
if (ch >= '0' && ch <= '9') || ch == '.' {
numberStr.WriteRune(ch)
} else {
break
}
}
add, err := strconv.ParseFloat(numberStr.String(), 64)
if err != nil {
log.Printf("Failed to parse number from line %q: %v", localLine, err)
continue
}
testSums[lastTest] += add
// And now for the stddev:
switch lastTest {
case "gen_a:":
gen_a = append(gen_a, add)
case "indcpa_keypair:":
indcpa_keypair = append(indcpa_keypair, add)
case "indcpa_enc:":
indcpa_enc = append(indcpa_enc, add)
case "kyber_keypair_derand:":
keypair_derand = append(keypair_derand, add)
case "kyber_keypair:":
keypair = append(keypair, add)
case "kyber_encaps:":
encaps = append(encaps, add)
case "kyber_decaps:":
decaps = append(decaps, add)
default:
continue
}
continue
// We aren't concerned with the medians here.
} else if strings.Contains(localLine, "median:") {
continue
}
// Here, figure out what the test was for the next data.
trimmed := strings.TrimSpace(localLine)
if strings.HasSuffix(trimmed, ":") && !strings.Contains(trimmed, "average") && !strings.Contains(trimmed, "median") {
lastTest = trimmed
continue
}
}
// Now we take the averages and stddevs.
fmt.Printf("gen_a avg: %f\ngen_a stddev: %f\n", testSums["gen_a:"]/count, calcStddev("gen_a:", gen_a))
fmt.Printf("indcpa keypair avg: %f\nindcpa_keypair stddev: %f\n", testSums["indcpa_keypair:"]/count, calcStddev("indcpa_keypair:", indcpa_keypair))
fmt.Printf("indcpa enc avg: %f\nindcpa_enc stddev: %f\n", testSums["indcpa_enc:"]/count, calcStddev("indcpa_enc:", indcpa_enc))
fmt.Printf("keypair_derand avg: %f\nkeypair_derand stddev:: %f\n", testSums["kyber_keypair_derand:"]/count, calcStddev("kyber_keypair_derand:", keypair_derand))
fmt.Printf("keypair avg: %f\nkeypair stddev:: %f\n", testSums["kyber_keypair:"]/count, calcStddev("kyber_keypair:", keypair))
fmt.Printf("encaps avg: %f\nencaps stddev:: %f\n", testSums["kyber_encaps:"]/count, calcStddev("kyber_encaps:", encaps))
fmt.Printf("decaps avg: %f\ndecaps stddev:: %f\n", testSums["kyber_decaps:"]/count, calcStddev("kyber_decaps:", decaps))
}
func initTestSums() {
testSums = make(map[string]float64)
testSums["gen_a:"] = 0
testSums["indcpa_keypair:"] = 0
testSums["indcpa_enc:"] = 0
testSums["kyber_keypair_derand:"] = 0
testSums["kyber_keypair:"] = 0
testSums["kyber_encaps:"] = 0
testSums["kyber_decaps:"] = 0
}
func calcStddev(test string, inputs []float64) (result float64) {
mean := float64(testSums[test] / float64(len(inputs)))
var variance float64
for _, value := range inputs {
variance += (value - mean) * (value - mean)
}
return math.Sqrt(variance / float64(len(inputs)))
}