diff --git a/Makefile b/Makefile index a067f48..9d38c05 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build deploy sign download-model download-pdfjs convert-images pdf-thumbs pdfs watch clean dev +.PHONY: build deploy sign download-model download-pdfjs compress-assets convert-images pdf-thumbs pdfs watch clean dev # Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists. # .env format: KEY=value (one per line, no `export` prefix, no quotes needed). @@ -25,6 +25,7 @@ build: else \ echo "Embedding skipped: run 'uv sync' to enable similar-links (build continues)"; \ fi + @./tools/compress-assets.sh _site > IGNORE.txt @BUILD_END=$$(date +%s); \ BUILD_START=$$(cat data/build-start.txt); \ @@ -44,6 +45,12 @@ download-model: download-pdfjs: @./tools/download-pdfjs.sh +# Generate .gz and .br sidecars for compressible text assets in _site/. +# Runs automatically as part of `build`. Pairs with `gzip_static` / +# `brotli_static` in the nginx vhost (see nginx/static-assets.conf). +compress-assets: + @./tools/compress-assets.sh _site + # Convert JPEG/PNG images to WebP companions (also runs automatically in build). # Requires cwebp: pacman -S libwebp / apt install webp convert-images: diff --git a/nginx/static-assets.conf b/nginx/static-assets.conf new file mode 100644 index 0000000..f44b69f --- /dev/null +++ b/nginx/static-assets.conf @@ -0,0 +1,78 @@ +# static-assets.conf — Compression + long-lived cache headers for static assets. +# +# Place at /etc/nginx/snippets/static-assets.conf and `include` it inside +# the server { } block of the levineuwirth.org vhost: +# +# server { +# server_name levineuwirth.org; +# root /var/www/levineuwirth.org; +# ... +# include snippets/static-assets.conf; +# include snippets/popup-proxy.conf; +# } +# +# Pairs with tools/compress-assets.sh, which pre-generates .gz and .br +# sidecars at build time so nginx never pays the compression cost at +# request time. + +# ── On-the-fly gzip (fallback) ─────────────────────────────────────── +# Covers dynamically generated responses and any file for which a .gz +# sidecar was not produced (e.g. files smaller than compress-assets.sh's +# MIN_SIZE threshold, or extensions not on its allow-list). +gzip on; +gzip_vary on; +gzip_comp_level 6; +gzip_min_length 256; +gzip_proxied any; +gzip_types text/plain + text/css + text/xml + application/javascript + text/javascript + application/json + application/xml + application/xml+rss + application/wasm + image/svg+xml; + +# ── Pre-compressed sidecars ────────────────────────────────────────── +# Serve .gz / .br when the client advertises a matching +# Accept-Encoding. Zero request-time CPU; maximum compression ratio +# because the sidecars were produced with gzip -9 / brotli -Z. +gzip_static on; + +# brotli_static requires the ngx_brotli module: +# Arch: pacman -S nginx-mod-brotli (or build nginx-mainline with the module) +# Debian/Ubuntu: apt install libnginx-mod-brotli +# If the module is absent, comment out the two brotli lines below; gzip_static +# will still cover every modern browser. Chromium/Firefox/Safari all accept gzip. +brotli_static on; +brotli off; # we ship pre-compressed sidecars only, no on-the-fly brotli + +# ── Cache headers ──────────────────────────────────────────────────── +# PDF.js viewer is version-pinned in tools/download-pdfjs.sh — bumping +# the pin is a deploy, so `immutable` is safe and makes repeat visits +# instantaneous. Same reasoning applies to fingerprinted fonts and the +# locally vendored ML model files. +location ^~ /pdfjs/ { + add_header Cache-Control "public, max-age=31536000, immutable" always; + access_log off; +} + +location ^~ /fonts/ { + add_header Cache-Control "public, max-age=31536000, immutable" always; + access_log off; +} + +location ^~ /models/ { + add_header Cache-Control "public, max-age=31536000, immutable" always; + access_log off; +} + +# Per-extension caching for assets that live alongside HTML. CSS and JS +# in this repo are not fingerprinted, so a 1-day cache with must-revalidate +# keeps them responsive to deploys without forcing a fetch on every page. +location ~* \.(?:css|js|mjs|woff2?|svg|webp|png|jpg|jpeg|ico)$ { + add_header Cache-Control "public, max-age=86400, must-revalidate" always; + access_log off; +} diff --git a/tools/compress-assets.sh b/tools/compress-assets.sh new file mode 100755 index 0000000..f76dea8 --- /dev/null +++ b/tools/compress-assets.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# compress-assets.sh — Generate .gz (and .br, if brotli is installed) sidecars +# for compressible text assets in _site/. +# +# Pairs with nginx `gzip_static on` / `brotli_static on`: nginx serves the +# pre-compressed file when the client advertises a matching Accept-Encoding, +# so each build pays the compression cost once (at brotli -q 11) instead of +# the server paying it on every request. +# +# Only files >= MIN_SIZE bytes are compressed — below that, the compression +# framing overhead can exceed the savings. Sidecars are regenerated only +# when the source is newer than the existing sidecar, so re-runs are cheap. +# +# Usage: +# ./tools/compress-assets.sh # compress _site/ +# ./tools/compress-assets.sh path/to/dir # compress a specific directory + +set -euo pipefail + +SITE_DIR="${1:-_site}" +MIN_SIZE="${MIN_SIZE:-1024}" # bytes + +if [ ! -d "$SITE_DIR" ]; then + echo "compress-assets: directory '$SITE_DIR' not found" >&2 + exit 1 +fi + +have_brotli=0 +if command -v brotli >/dev/null 2>&1; then + have_brotli=1 +else + echo "compress-assets: brotli not found — generating gzip only" >&2 + echo " (install: pacman -S brotli / apt install brotli)" >&2 +fi + +# Export for subshells invoked by xargs. +export MIN_SIZE +export have_brotli + +compress_one() { + local src="$1" + local size + size=$(stat -c '%s' "$src" 2>/dev/null || stat -f '%z' "$src") + if [ "$size" -lt "$MIN_SIZE" ]; then + return + fi + + # gzip sidecar — -9 max ratio, -n strips filename/mtime for reproducible output. + if [ ! -f "$src.gz" ] || [ "$src" -nt "$src.gz" ]; then + gzip -9 -n -c "$src" > "$src.gz.tmp" && mv "$src.gz.tmp" "$src.gz" + fi + + # brotli sidecar — -Z is the max quality (level 11); slow but cached. + if [ "$have_brotli" = "1" ]; then + if [ ! -f "$src.br" ] || [ "$src" -nt "$src.br" ]; then + brotli -Z -f -o "$src.br.tmp" "$src" && mv "$src.br.tmp" "$src.br" + fi + fi +} +export -f compress_one + +# Extensions worth compressing. Images (png/jpg/webp) and PDFs are already +# compressed; fonts (woff2) are zstd/brotli internally — don't re-wrap. +find "$SITE_DIR" -type f \( \ + -name '*.html' -o \ + -name '*.css' -o \ + -name '*.js' -o \ + -name '*.mjs' -o \ + -name '*.json' -o \ + -name '*.svg' -o \ + -name '*.xml' -o \ + -name '*.txt' -o \ + -name '*.wasm' \ + \) \ + -not -name '*.gz' \ + -not -name '*.br' \ + -print0 \ + | xargs -0 -P "$(nproc 2>/dev/null || echo 4)" -I {} bash -c 'compress_one "$@"' _ {} + +echo "compress-assets: sidecars written under $SITE_DIR/" diff --git a/tools/download-pdfjs.sh b/tools/download-pdfjs.sh index 03e1d33..97da7b0 100755 --- a/tools/download-pdfjs.sh +++ b/tools/download-pdfjs.sh @@ -60,5 +60,15 @@ mkdir -p "$PDFJS_DIR" echo "pdfjs: extracting to $PDFJS_DIR" unzip -q -o "$tmpdir/$ARCHIVE" -d "$PDFJS_DIR" +# Strip artifacts that are never needed by site users. Saves ~11 MB on +# disk and in rsync; none are referenced by viewer.html at runtime. +# *.map sourcemaps (devtools-only) +# web/debugger.mjs, debugger.css PDF.js developer panel +# web/compressed.tracemonkey-*.pdf demo PDF shipped as the viewer's default +echo "pdfjs: stripping unused artifacts" +find "$PDFJS_DIR" -type f -name '*.map' -delete +rm -f "$PDFJS_DIR/web/debugger.mjs" "$PDFJS_DIR/web/debugger.css" +rm -f "$PDFJS_DIR"/web/compressed.tracemonkey-*.pdf + echo "pdfjs: done. static/pdfjs/web/viewer.html is ready." echo " Run 'make build' to include it in _site/."