PDF compression
This commit is contained in:
parent
3a95a05284
commit
6d2f9d12ae
9
Makefile
9
Makefile
|
|
@ -1,4 +1,4 @@
|
||||||
.PHONY: build deploy sign download-model download-pdfjs convert-images pdf-thumbs pdfs watch clean dev
|
.PHONY: build deploy sign download-model download-pdfjs compress-assets convert-images pdf-thumbs pdfs watch clean dev
|
||||||
|
|
||||||
# Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists.
|
# Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists.
|
||||||
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
||||||
|
|
@ -25,6 +25,7 @@ build:
|
||||||
else \
|
else \
|
||||||
echo "Embedding skipped: run 'uv sync' to enable similar-links (build continues)"; \
|
echo "Embedding skipped: run 'uv sync' to enable similar-links (build continues)"; \
|
||||||
fi
|
fi
|
||||||
|
@./tools/compress-assets.sh _site
|
||||||
> IGNORE.txt
|
> IGNORE.txt
|
||||||
@BUILD_END=$$(date +%s); \
|
@BUILD_END=$$(date +%s); \
|
||||||
BUILD_START=$$(cat data/build-start.txt); \
|
BUILD_START=$$(cat data/build-start.txt); \
|
||||||
|
|
@ -44,6 +45,12 @@ download-model:
|
||||||
download-pdfjs:
|
download-pdfjs:
|
||||||
@./tools/download-pdfjs.sh
|
@./tools/download-pdfjs.sh
|
||||||
|
|
||||||
|
# Generate .gz and .br sidecars for compressible text assets in _site/.
|
||||||
|
# Runs automatically as part of `build`. Pairs with `gzip_static` /
|
||||||
|
# `brotli_static` in the nginx vhost (see nginx/static-assets.conf).
|
||||||
|
compress-assets:
|
||||||
|
@./tools/compress-assets.sh _site
|
||||||
|
|
||||||
# Convert JPEG/PNG images to WebP companions (also runs automatically in build).
|
# Convert JPEG/PNG images to WebP companions (also runs automatically in build).
|
||||||
# Requires cwebp: pacman -S libwebp / apt install webp
|
# Requires cwebp: pacman -S libwebp / apt install webp
|
||||||
convert-images:
|
convert-images:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
# static-assets.conf — Compression + long-lived cache headers for static assets.
|
||||||
|
#
|
||||||
|
# Place at /etc/nginx/snippets/static-assets.conf and `include` it inside
|
||||||
|
# the server { } block of the levineuwirth.org vhost:
|
||||||
|
#
|
||||||
|
# server {
|
||||||
|
# server_name levineuwirth.org;
|
||||||
|
# root /var/www/levineuwirth.org;
|
||||||
|
# ...
|
||||||
|
# include snippets/static-assets.conf;
|
||||||
|
# include snippets/popup-proxy.conf;
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# Pairs with tools/compress-assets.sh, which pre-generates .gz and .br
|
||||||
|
# sidecars at build time so nginx never pays the compression cost at
|
||||||
|
# request time.
|
||||||
|
|
||||||
|
# ── On-the-fly gzip (fallback) ───────────────────────────────────────
|
||||||
|
# Covers dynamically generated responses and any file for which a .gz
|
||||||
|
# sidecar was not produced (e.g. files smaller than compress-assets.sh's
|
||||||
|
# MIN_SIZE threshold, or extensions not on its allow-list).
|
||||||
|
gzip on;
|
||||||
|
gzip_vary on;
|
||||||
|
gzip_comp_level 6;
|
||||||
|
gzip_min_length 256;
|
||||||
|
gzip_proxied any;
|
||||||
|
gzip_types text/plain
|
||||||
|
text/css
|
||||||
|
text/xml
|
||||||
|
application/javascript
|
||||||
|
text/javascript
|
||||||
|
application/json
|
||||||
|
application/xml
|
||||||
|
application/xml+rss
|
||||||
|
application/wasm
|
||||||
|
image/svg+xml;
|
||||||
|
|
||||||
|
# ── Pre-compressed sidecars ──────────────────────────────────────────
|
||||||
|
# Serve <file>.gz / <file>.br when the client advertises a matching
|
||||||
|
# Accept-Encoding. Zero request-time CPU; maximum compression ratio
|
||||||
|
# because the sidecars were produced with gzip -9 / brotli -Z.
|
||||||
|
gzip_static on;
|
||||||
|
|
||||||
|
# brotli_static requires the ngx_brotli module:
|
||||||
|
# Arch: pacman -S nginx-mod-brotli (or build nginx-mainline with the module)
|
||||||
|
# Debian/Ubuntu: apt install libnginx-mod-brotli
|
||||||
|
# If the module is absent, comment out the two brotli lines below; gzip_static
|
||||||
|
# will still cover every modern browser. Chromium/Firefox/Safari all accept gzip.
|
||||||
|
brotli_static on;
|
||||||
|
brotli off; # we ship pre-compressed sidecars only, no on-the-fly brotli
|
||||||
|
|
||||||
|
# ── Cache headers ────────────────────────────────────────────────────
|
||||||
|
# PDF.js viewer is version-pinned in tools/download-pdfjs.sh — bumping
|
||||||
|
# the pin is a deploy, so `immutable` is safe and makes repeat visits
|
||||||
|
# instantaneous. Same reasoning applies to fingerprinted fonts and the
|
||||||
|
# locally vendored ML model files.
|
||||||
|
location ^~ /pdfjs/ {
|
||||||
|
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ^~ /fonts/ {
|
||||||
|
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ^~ /models/ {
|
||||||
|
add_header Cache-Control "public, max-age=31536000, immutable" always;
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Per-extension caching for assets that live alongside HTML. CSS and JS
|
||||||
|
# in this repo are not fingerprinted, so a 1-day cache with must-revalidate
|
||||||
|
# keeps them responsive to deploys without forcing a fetch on every page.
|
||||||
|
location ~* \.(?:css|js|mjs|woff2?|svg|webp|png|jpg|jpeg|ico)$ {
|
||||||
|
add_header Cache-Control "public, max-age=86400, must-revalidate" always;
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# compress-assets.sh — Generate .gz (and .br, if brotli is installed) sidecars
|
||||||
|
# for compressible text assets in _site/.
|
||||||
|
#
|
||||||
|
# Pairs with nginx `gzip_static on` / `brotli_static on`: nginx serves the
|
||||||
|
# pre-compressed file when the client advertises a matching Accept-Encoding,
|
||||||
|
# so each build pays the compression cost once (at brotli -q 11) instead of
|
||||||
|
# the server paying it on every request.
|
||||||
|
#
|
||||||
|
# Only files >= MIN_SIZE bytes are compressed — below that, the compression
|
||||||
|
# framing overhead can exceed the savings. Sidecars are regenerated only
|
||||||
|
# when the source is newer than the existing sidecar, so re-runs are cheap.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./tools/compress-assets.sh # compress _site/
|
||||||
|
# ./tools/compress-assets.sh path/to/dir # compress a specific directory
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SITE_DIR="${1:-_site}"
|
||||||
|
MIN_SIZE="${MIN_SIZE:-1024}" # bytes
|
||||||
|
|
||||||
|
if [ ! -d "$SITE_DIR" ]; then
|
||||||
|
echo "compress-assets: directory '$SITE_DIR' not found" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
have_brotli=0
|
||||||
|
if command -v brotli >/dev/null 2>&1; then
|
||||||
|
have_brotli=1
|
||||||
|
else
|
||||||
|
echo "compress-assets: brotli not found — generating gzip only" >&2
|
||||||
|
echo " (install: pacman -S brotli / apt install brotli)" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Export for subshells invoked by xargs.
|
||||||
|
export MIN_SIZE
|
||||||
|
export have_brotli
|
||||||
|
|
||||||
|
compress_one() {
|
||||||
|
local src="$1"
|
||||||
|
local size
|
||||||
|
size=$(stat -c '%s' "$src" 2>/dev/null || stat -f '%z' "$src")
|
||||||
|
if [ "$size" -lt "$MIN_SIZE" ]; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# gzip sidecar — -9 max ratio, -n strips filename/mtime for reproducible output.
|
||||||
|
if [ ! -f "$src.gz" ] || [ "$src" -nt "$src.gz" ]; then
|
||||||
|
gzip -9 -n -c "$src" > "$src.gz.tmp" && mv "$src.gz.tmp" "$src.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# brotli sidecar — -Z is the max quality (level 11); slow but cached.
|
||||||
|
if [ "$have_brotli" = "1" ]; then
|
||||||
|
if [ ! -f "$src.br" ] || [ "$src" -nt "$src.br" ]; then
|
||||||
|
brotli -Z -f -o "$src.br.tmp" "$src" && mv "$src.br.tmp" "$src.br"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
export -f compress_one
|
||||||
|
|
||||||
|
# Extensions worth compressing. Images (png/jpg/webp) and PDFs are already
|
||||||
|
# compressed; fonts (woff2) are zstd/brotli internally — don't re-wrap.
|
||||||
|
find "$SITE_DIR" -type f \( \
|
||||||
|
-name '*.html' -o \
|
||||||
|
-name '*.css' -o \
|
||||||
|
-name '*.js' -o \
|
||||||
|
-name '*.mjs' -o \
|
||||||
|
-name '*.json' -o \
|
||||||
|
-name '*.svg' -o \
|
||||||
|
-name '*.xml' -o \
|
||||||
|
-name '*.txt' -o \
|
||||||
|
-name '*.wasm' \
|
||||||
|
\) \
|
||||||
|
-not -name '*.gz' \
|
||||||
|
-not -name '*.br' \
|
||||||
|
-print0 \
|
||||||
|
| xargs -0 -P "$(nproc 2>/dev/null || echo 4)" -I {} bash -c 'compress_one "$@"' _ {}
|
||||||
|
|
||||||
|
echo "compress-assets: sidecars written under $SITE_DIR/"
|
||||||
|
|
@ -60,5 +60,15 @@ mkdir -p "$PDFJS_DIR"
|
||||||
echo "pdfjs: extracting to $PDFJS_DIR"
|
echo "pdfjs: extracting to $PDFJS_DIR"
|
||||||
unzip -q -o "$tmpdir/$ARCHIVE" -d "$PDFJS_DIR"
|
unzip -q -o "$tmpdir/$ARCHIVE" -d "$PDFJS_DIR"
|
||||||
|
|
||||||
|
# Strip artifacts that are never needed by site users. Saves ~11 MB on
|
||||||
|
# disk and in rsync; none are referenced by viewer.html at runtime.
|
||||||
|
# *.map sourcemaps (devtools-only)
|
||||||
|
# web/debugger.mjs, debugger.css PDF.js developer panel
|
||||||
|
# web/compressed.tracemonkey-*.pdf demo PDF shipped as the viewer's default
|
||||||
|
echo "pdfjs: stripping unused artifacts"
|
||||||
|
find "$PDFJS_DIR" -type f -name '*.map' -delete
|
||||||
|
rm -f "$PDFJS_DIR/web/debugger.mjs" "$PDFJS_DIR/web/debugger.css"
|
||||||
|
rm -f "$PDFJS_DIR"/web/compressed.tracemonkey-*.pdf
|
||||||
|
|
||||||
echo "pdfjs: done. static/pdfjs/web/viewer.html is ready."
|
echo "pdfjs: done. static/pdfjs/web/viewer.html is ready."
|
||||||
echo " Run 'make build' to include it in _site/."
|
echo " Run 'make build' to include it in _site/."
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue