# popup-proxy.conf — same-origin reverse proxy for popups.js providers # whose upstream APIs do not send CORS headers (arXiv, NCBI/PubMed, # Internet Archive). All three return immutable metadata, so the cache # TTL is generous; a manual `proxy_cache_purge` is unnecessary. # # Place this file at /etc/nginx/snippets/popup-proxy.conf and `include` # it inside the server { } block of the levineuwirth.org vhost. The # `proxy_cache_path` directive must live in the http { } context — put # it in nginx.conf or the relevant conf.d/ file. # # http { # proxy_cache_path /var/cache/nginx/popup-proxy # levels=1:2 keys_zone=popup_proxy:16m # max_size=512m inactive=60d use_temp_path=off; # ... # } # # server { # server_name levineuwirth.org; # ... # include snippets/popup-proxy.conf; # } # All locations use `^~` prefix matching: without it, the regex # location in static-assets.conf (per-extension cache headers) outranks # a plain prefix match and captures any proxied URL ending in an image # extension — e.g. an arXiv figure .png — serving a local 404 instead # of proxying. `^~` short-circuits regex evaluation for this subtree. # Shared resolver — needed because proxy_pass uses a variable upstream # (literal upstreams are resolved once at startup; variables defer DNS # to request time, which lets nginx start without the upstream being # reachable and survives upstream IP changes). resolver 1.1.1.1 8.8.8.8 ipv6=off valid=300s; resolver_timeout 5s; # ── arXiv ──────────────────────────────────────────────────────────── # Atom feed of paper metadata. Abstracts never change after publication # (revisions get distinct IDs like 2604.06217v2), so 30d is safe. location ^~ /proxy/arxiv/ { set $upstream_arxiv export.arxiv.org; # With a VARIABLE upstream, a URI part on proxy_pass is passed to # the upstream literally — "proxy_pass https://$up/;" sends every # request to the upstream's homepage instead of prefix-stripping. # Strip the prefix explicitly; `break` keeps args intact. rewrite ^/proxy/arxiv/(.*)$ /$1 break; proxy_pass https://$upstream_arxiv; proxy_set_header Host $upstream_arxiv; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_ssl_server_name on; # Keep the security baseline: the add_header directives below # would otherwise drop it for /proxy/ responses (same pattern # as archive.conf). The upstream's own security headers are hidden # first — browsers honor only the FIRST Strict-Transport-Security # header (RFC 6797 §8.1), so an upstream's short max-age passing # through ahead of ours would downgrade the domain's cached HSTS # policy on every popup fetch. proxy_hide_header Strict-Transport-Security; proxy_hide_header Content-Security-Policy; proxy_hide_header X-Frame-Options; include snippets/security-headers.conf; proxy_cache popup_proxy; proxy_cache_valid 200 30d; proxy_cache_valid any 5m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_lock on; add_header X-Cache-Status $upstream_cache_status always; # Belt-and-suspenders: even though same-origin doesn't need CORS, a # future migration of popups.js to a worker or different origin would. add_header Access-Control-Allow-Origin "$scheme://$host" always; } # ── arXiv HTML renditions (lead figures) ───────────────────────────── # popups.js's arXiv enrich step fetches the LaTeXML HTML page to find # the paper's lead figure, then loads the figure image itself — both # through this location. Upstream is arxiv.org proper, NOT # export.arxiv.org: the export host serves the Atom API fine but # rate-limits the /html/ asset tree (429s on figures). Pages can be # large (hundreds of KB), which is exactly why they're cached here. # A 404 is the common no-HTML-rendition case (pre-2024 papers, # unconvertible sources) — cached briefly so hovers don't hammer it. location ^~ /proxy/arxiv-html/ { set $upstream_arxiv_site arxiv.org; rewrite ^/proxy/arxiv-html/(.*)$ /html/$1 break; proxy_pass https://$upstream_arxiv_site; proxy_set_header Host $upstream_arxiv_site; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_ssl_server_name on; # Keep the security baseline: the add_header directives below # would otherwise drop it for /proxy/ responses (same pattern # as archive.conf). proxy_hide_header Strict-Transport-Security; proxy_hide_header Content-Security-Policy; proxy_hide_header X-Frame-Options; include snippets/security-headers.conf; proxy_cache popup_proxy; proxy_cache_valid 200 30d; proxy_cache_valid 404 1d; proxy_cache_valid any 5m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_lock on; add_header X-Cache-Status $upstream_cache_status always; add_header Access-Control-Allow-Origin "$scheme://$host" always; } # ── Internet Archive ───────────────────────────────────────────────── # Item metadata JSON. Item descriptions are author-edited and could # change, but rarely; 7d strikes a reasonable balance. location ^~ /proxy/archive/ { set $upstream_archive archive.org; # Prefix-strip explicitly — see the arXiv block for why a URI part # on a variable proxy_pass would break this. rewrite ^/proxy/archive/(.*)$ /$1 break; proxy_pass https://$upstream_archive; proxy_set_header Host $upstream_archive; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_ssl_server_name on; # Keep the security baseline: the add_header directives below # would otherwise drop it for /proxy/ responses (same pattern # as archive.conf). The upstream's own security headers are hidden # first — browsers honor only the FIRST Strict-Transport-Security # header (RFC 6797 §8.1), so an upstream's short max-age passing # through ahead of ours would downgrade the domain's cached HSTS # policy on every popup fetch. proxy_hide_header Strict-Transport-Security; proxy_hide_header Content-Security-Policy; proxy_hide_header X-Frame-Options; include snippets/security-headers.conf; proxy_cache popup_proxy; proxy_cache_valid 200 7d; proxy_cache_valid any 5m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_lock on; add_header X-Cache-Status $upstream_cache_status always; add_header Access-Control-Allow-Origin "$scheme://$host" always; } # ── PubMed (NCBI E-utilities) ──────────────────────────────────────── # Article summaries. NCBI requests a tool=/email= identifier on every # request (https://www.ncbi.nlm.nih.gov/books/NBK25497/); we inject # them server-side so popups.js stays focused on rendering. location ^~ /proxy/pubmed/ { set $upstream_pubmed eutils.ncbi.nlm.nih.gov; # Prefix-strip explicitly — see the arXiv block for why a URI part # on a variable proxy_pass would break this. rewrite ^/proxy/pubmed/(.*)$ /$1 break; proxy_pass https://$upstream_pubmed; proxy_set_header Host $upstream_pubmed; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_ssl_server_name on; # NCBI etiquette: rate-limit to <3 req/s without an API key. With # caching this is rarely exercised, but the burst guards a hot page. limit_req zone=pubmed burst=3 nodelay; # Keep the security baseline: the add_header directives below # would otherwise drop it for /proxy/ responses (same pattern # as archive.conf). The upstream's own security headers are hidden # first — browsers honor only the FIRST Strict-Transport-Security # header (RFC 6797 §8.1), so an upstream's short max-age passing # through ahead of ours would downgrade the domain's cached HSTS # policy on every popup fetch. proxy_hide_header Strict-Transport-Security; proxy_hide_header Content-Security-Policy; proxy_hide_header X-Frame-Options; include snippets/security-headers.conf; proxy_cache popup_proxy; proxy_cache_valid 200 30d; proxy_cache_valid any 5m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_lock on; add_header X-Cache-Status $upstream_cache_status always; add_header Access-Control-Allow-Origin "$scheme://$host" always; } # Companion directive for the limit_req above. Place in http { } context: # # http { # limit_req_zone $binary_remote_addr zone=pubmed:1m rate=3r/s; # ... # }