PDF compression
This commit is contained in:
parent
3a95a05284
commit
6d2f9d12ae
9
Makefile
9
Makefile
|
|
@ -1,4 +1,4 @@
|
|||
.PHONY: build deploy sign download-model download-pdfjs convert-images pdf-thumbs pdfs watch clean dev
|
||||
.PHONY: build deploy sign download-model download-pdfjs compress-assets convert-images pdf-thumbs pdfs watch clean dev
|
||||
|
||||
# Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists.
|
||||
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
||||
|
|
@ -25,6 +25,7 @@ build:
|
|||
else \
|
||||
echo "Embedding skipped: run 'uv sync' to enable similar-links (build continues)"; \
|
||||
fi
|
||||
@./tools/compress-assets.sh _site
|
||||
> IGNORE.txt
|
||||
@BUILD_END=$$(date +%s); \
|
||||
BUILD_START=$$(cat data/build-start.txt); \
|
||||
|
|
@ -44,6 +45,12 @@ download-model:
|
|||
download-pdfjs:
|
||||
@./tools/download-pdfjs.sh
|
||||
|
||||
# Generate .gz and .br sidecars for compressible text assets in _site/.
|
||||
# Runs automatically as part of `build`. Pairs with `gzip_static` /
|
||||
# `brotli_static` in the nginx vhost (see nginx/static-assets.conf).
|
||||
compress-assets:
|
||||
@./tools/compress-assets.sh _site
|
||||
|
||||
# Convert JPEG/PNG images to WebP companions (also runs automatically in build).
|
||||
# Requires cwebp: pacman -S libwebp / apt install webp
|
||||
convert-images:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,78 @@
|
|||
# static-assets.conf — Compression + long-lived cache headers for static assets.
|
||||
#
|
||||
# Place at /etc/nginx/snippets/static-assets.conf and `include` it inside
|
||||
# the server { } block of the levineuwirth.org vhost:
|
||||
#
|
||||
# server {
|
||||
# server_name levineuwirth.org;
|
||||
# root /var/www/levineuwirth.org;
|
||||
# ...
|
||||
# include snippets/static-assets.conf;
|
||||
# include snippets/popup-proxy.conf;
|
||||
# }
|
||||
#
|
||||
# Pairs with tools/compress-assets.sh, which pre-generates .gz and .br
|
||||
# sidecars at build time so nginx never pays the compression cost at
|
||||
# request time.
|
||||
|
||||
# ── On-the-fly gzip (fallback) ───────────────────────────────────────
|
||||
# Covers dynamically generated responses and any file for which a .gz
|
||||
# sidecar was not produced (e.g. files smaller than compress-assets.sh's
|
||||
# MIN_SIZE threshold, or extensions not on its allow-list).
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_comp_level 6;
|
||||
gzip_min_length 256;
|
||||
gzip_proxied any;
|
||||
gzip_types text/plain
|
||||
text/css
|
||||
text/xml
|
||||
application/javascript
|
||||
text/javascript
|
||||
application/json
|
||||
application/xml
|
||||
application/xml+rss
|
||||
application/wasm
|
||||
image/svg+xml;
|
||||
|
||||
# ── Pre-compressed sidecars ──────────────────────────────────────────
|
||||
# Serve <file>.gz / <file>.br when the client advertises a matching
|
||||
# Accept-Encoding. Zero request-time CPU; maximum compression ratio
|
||||
# because the sidecars were produced with gzip -9 / brotli -Z.
|
||||
gzip_static on;
|
||||
|
||||
# brotli_static requires the ngx_brotli module:
|
||||
# Arch: pacman -S nginx-mod-brotli (or build nginx-mainline with the module)
|
||||
# Debian/Ubuntu: apt install libnginx-mod-brotli
|
||||
# If the module is absent, comment out the two brotli lines below; gzip_static
|
||||
# will still cover every modern browser. Chromium/Firefox/Safari all accept gzip.
|
||||
brotli_static on;
|
||||
brotli off; # we ship pre-compressed sidecars only, no on-the-fly brotli
|
||||
|
||||
# ── Cache headers ────────────────────────────────────────────────────
|
||||
# PDF.js viewer is version-pinned in tools/download-pdfjs.sh — bumping
|
||||
# the pin is a deploy, so `immutable` is safe and makes repeat visits
|
||||
# instantaneous. Same reasoning applies to fingerprinted fonts and the
|
||||
# locally vendored ML model files.
|
||||
location ^~ /pdfjs/ {
|
||||
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location ^~ /fonts/ {
|
||||
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
location ^~ /models/ {
|
||||
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# Per-extension caching for assets that live alongside HTML. CSS and JS
|
||||
# in this repo are not fingerprinted, so a 1-day cache with must-revalidate
|
||||
# keeps them responsive to deploys without forcing a fetch on every page.
|
||||
location ~* \.(?:css|js|mjs|woff2?|svg|webp|png|jpg|jpeg|ico)$ {
|
||||
add_header Cache-Control "public, max-age=86400, must-revalidate" always;
|
||||
access_log off;
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env bash
|
||||
# compress-assets.sh — Generate .gz (and .br, if brotli is installed) sidecars
|
||||
# for compressible text assets in _site/.
|
||||
#
|
||||
# Pairs with nginx `gzip_static on` / `brotli_static on`: nginx serves the
|
||||
# pre-compressed file when the client advertises a matching Accept-Encoding,
|
||||
# so each build pays the compression cost once (at brotli -q 11) instead of
|
||||
# the server paying it on every request.
|
||||
#
|
||||
# Only files >= MIN_SIZE bytes are compressed — below that, the compression
|
||||
# framing overhead can exceed the savings. Sidecars are regenerated only
|
||||
# when the source is newer than the existing sidecar, so re-runs are cheap.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/compress-assets.sh # compress _site/
|
||||
# ./tools/compress-assets.sh path/to/dir # compress a specific directory
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SITE_DIR="${1:-_site}"
|
||||
MIN_SIZE="${MIN_SIZE:-1024}" # bytes
|
||||
|
||||
if [ ! -d "$SITE_DIR" ]; then
|
||||
echo "compress-assets: directory '$SITE_DIR' not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
have_brotli=0
|
||||
if command -v brotli >/dev/null 2>&1; then
|
||||
have_brotli=1
|
||||
else
|
||||
echo "compress-assets: brotli not found — generating gzip only" >&2
|
||||
echo " (install: pacman -S brotli / apt install brotli)" >&2
|
||||
fi
|
||||
|
||||
# Export for subshells invoked by xargs.
|
||||
export MIN_SIZE
|
||||
export have_brotli
|
||||
|
||||
compress_one() {
|
||||
local src="$1"
|
||||
local size
|
||||
size=$(stat -c '%s' "$src" 2>/dev/null || stat -f '%z' "$src")
|
||||
if [ "$size" -lt "$MIN_SIZE" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
# gzip sidecar — -9 max ratio, -n strips filename/mtime for reproducible output.
|
||||
if [ ! -f "$src.gz" ] || [ "$src" -nt "$src.gz" ]; then
|
||||
gzip -9 -n -c "$src" > "$src.gz.tmp" && mv "$src.gz.tmp" "$src.gz"
|
||||
fi
|
||||
|
||||
# brotli sidecar — -Z is the max quality (level 11); slow but cached.
|
||||
if [ "$have_brotli" = "1" ]; then
|
||||
if [ ! -f "$src.br" ] || [ "$src" -nt "$src.br" ]; then
|
||||
brotli -Z -f -o "$src.br.tmp" "$src" && mv "$src.br.tmp" "$src.br"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
export -f compress_one
|
||||
|
||||
# Extensions worth compressing. Images (png/jpg/webp) and PDFs are already
|
||||
# compressed; fonts (woff2) are zstd/brotli internally — don't re-wrap.
|
||||
find "$SITE_DIR" -type f \( \
|
||||
-name '*.html' -o \
|
||||
-name '*.css' -o \
|
||||
-name '*.js' -o \
|
||||
-name '*.mjs' -o \
|
||||
-name '*.json' -o \
|
||||
-name '*.svg' -o \
|
||||
-name '*.xml' -o \
|
||||
-name '*.txt' -o \
|
||||
-name '*.wasm' \
|
||||
\) \
|
||||
-not -name '*.gz' \
|
||||
-not -name '*.br' \
|
||||
-print0 \
|
||||
| xargs -0 -P "$(nproc 2>/dev/null || echo 4)" -I {} bash -c 'compress_one "$@"' _ {}
|
||||
|
||||
echo "compress-assets: sidecars written under $SITE_DIR/"
|
||||
|
|
@ -60,5 +60,15 @@ mkdir -p "$PDFJS_DIR"
|
|||
echo "pdfjs: extracting to $PDFJS_DIR"
|
||||
unzip -q -o "$tmpdir/$ARCHIVE" -d "$PDFJS_DIR"
|
||||
|
||||
# Strip artifacts that are never needed by site users. Saves ~11 MB on
|
||||
# disk and in rsync; none are referenced by viewer.html at runtime.
|
||||
# *.map sourcemaps (devtools-only)
|
||||
# web/debugger.mjs, debugger.css PDF.js developer panel
|
||||
# web/compressed.tracemonkey-*.pdf demo PDF shipped as the viewer's default
|
||||
echo "pdfjs: stripping unused artifacts"
|
||||
find "$PDFJS_DIR" -type f -name '*.map' -delete
|
||||
rm -f "$PDFJS_DIR/web/debugger.mjs" "$PDFJS_DIR/web/debugger.css"
|
||||
rm -f "$PDFJS_DIR"/web/compressed.tracemonkey-*.pdf
|
||||
|
||||
echo "pdfjs: done. static/pdfjs/web/viewer.html is ready."
|
||||
echo " Run 'make build' to include it in _site/."
|
||||
|
|
|
|||
Loading…
Reference in New Issue