Fix popup previews: proxy prefix-strip bug, arXiv IDs, Wikipedia images

The root cause of 'PDF/arXiv previews simply do not work' was twofold:

1. nginx/popup-proxy.conf was never installed on the VPS — every
   /proxy/* request (arXiv, PubMed, Internet Archive) returned nginx's
   default 404. Now installed (snippets + http{}-context cache/limit
   zones in conf.d, included in the vhost, nginx -t verified, reloaded).
2. The snippet itself had a latent bug that only surfaced once
   installed: with a VARIABLE upstream, a URI part on proxy_pass is
   passed literally — every request hit the upstream's homepage
   (archive.org HTML where JSON was expected, arXiv 429s, NCBI doc-page
   redirects). Fixed with explicit prefix-strip rewrites; bad cached
   responses purged. All three proxies verified returning real data,
   including a live arXiv title resolve.

Client-side improvements:
- arXiv match covers old-style IDs (cs/9901002, math.GT/0309136,
  cond-mat/...v1) alongside new-style, and .pdf-suffixed /pdf/ URLs
  (regex verified against six forms)
- Wikipedia popups show the article's lead image: pageimages rides
  along the existing extracts call (pithumbsize=320), rendered via a
  new https-only image slot in renderPopup with float styling;
  upload.wikimedia.org added to the CSP's img-src
- pdf-thumbs now walks all of static/ (pdfjs pruned), so /cv.pdf and
  /resume.pdf — the most-linked internal PDFs, previously thumbnail-less
  and therefore popup-less — get hover previews

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Levi Neuwirth 2026-06-10 12:06:13 -04:00
parent 5d344f940e
commit 23250d8782
7 changed files with 56 additions and 9 deletions

View File

@ -122,8 +122,11 @@ pdf-thumbs:
# A failing pdftoppm must at least warn: the `find | while` pipeline's # A failing pdftoppm must at least warn: the `find | while` pipeline's
# exit status is the last iteration's, so without the `||` a corrupt # exit status is the last iteration's, so without the `||` a corrupt
# PDF would silently ship without a thumbnail. # PDF would silently ship without a thumbnail.
# Walk ALL of static/ (not just papers/): /cv.pdf and /resume.pdf are
# the most-linked PDFs on the site and need hover thumbnails too.
# pdfjs/ is pruned — the vendored viewer ships sample PDFs.
@if command -v pdftoppm >/dev/null 2>&1; then \ @if command -v pdftoppm >/dev/null 2>&1; then \
find static/papers -name '*.pdf' 2>/dev/null | while read pdf; do \ find static -path static/pdfjs -prune -o -name '*.pdf' -print 2>/dev/null | while read pdf; do \
thumb="$${pdf%.pdf}.thumb"; \ thumb="$${pdf%.pdf}.thumb"; \
if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \ if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \
echo " pdf-thumb $$pdf"; \ echo " pdf-thumb $$pdf"; \

View File

@ -33,7 +33,12 @@ resolver_timeout 5s;
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe. # (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
location /proxy/arxiv/ { location /proxy/arxiv/ {
set $upstream_arxiv export.arxiv.org; set $upstream_arxiv export.arxiv.org;
proxy_pass https://$upstream_arxiv/; # With a VARIABLE upstream, a URI part on proxy_pass is passed to
# the upstream literally — "proxy_pass https://$up/;" sends every
# request to the upstream's homepage instead of prefix-stripping.
# Strip the prefix explicitly; `break` keeps args intact.
rewrite ^/proxy/arxiv/(.*)$ /$1 break;
proxy_pass https://$upstream_arxiv;
proxy_set_header Host $upstream_arxiv; proxy_set_header Host $upstream_arxiv;
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
proxy_ssl_server_name on; proxy_ssl_server_name on;
@ -55,7 +60,10 @@ location /proxy/arxiv/ {
# change, but rarely; 7d strikes a reasonable balance. # change, but rarely; 7d strikes a reasonable balance.
location /proxy/archive/ { location /proxy/archive/ {
set $upstream_archive archive.org; set $upstream_archive archive.org;
proxy_pass https://$upstream_archive/; # Prefix-strip explicitly — see the arXiv block for why a URI part
# on a variable proxy_pass would break this.
rewrite ^/proxy/archive/(.*)$ /$1 break;
proxy_pass https://$upstream_archive;
proxy_set_header Host $upstream_archive; proxy_set_header Host $upstream_archive;
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
proxy_ssl_server_name on; proxy_ssl_server_name on;
@ -76,7 +84,10 @@ location /proxy/archive/ {
# them server-side so popups.js stays focused on rendering. # them server-side so popups.js stays focused on rendering.
location /proxy/pubmed/ { location /proxy/pubmed/ {
set $upstream_pubmed eutils.ncbi.nlm.nih.gov; set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
proxy_pass https://$upstream_pubmed/; # Prefix-strip explicitly — see the arXiv block for why a URI part
# on a variable proxy_pass would break this.
rewrite ^/proxy/pubmed/(.*)$ /$1 break;
proxy_pass https://$upstream_pubmed;
proxy_set_header Host $upstream_pubmed; proxy_set_header Host $upstream_pubmed;
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)"; proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
proxy_ssl_server_name on; proxy_ssl_server_name on;

View File

@ -75,4 +75,4 @@ add_header Permissions-Policy
# #
# To collect violation reports, set up a `report-uri` endpoint and add # To collect violation reports, set up a `report-uri` endpoint and add
# `report-uri /csp-report;` (and/or `report-to <group>;`) below. # `report-uri /csp-report;` (and/or `report-to <group>;`) below.
add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com; font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always; add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com https://upload.wikimedia.org;font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;

View File

@ -78,6 +78,18 @@
line-height: 1.35; line-height: 1.35;
} }
/* Optional lead image (Wikipedia pageimages thumbnail, etc.) floats
beside the title/extract so text wraps around it; contained by the
popup's own overflow box. */
.popup-image {
float: right;
max-width: 96px;
max-height: 120px;
margin: 0 0 0.4rem 0.6rem;
border-radius: 4px;
border: 1px solid var(--border-muted);
}
.popup-abstract, .popup-abstract,
.popup-extract { .popup-extract {
font-size: 0.78rem; font-size: 0.78rem;

BIN
static/cv.thumb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

View File

@ -453,6 +453,14 @@
var html = '<div class="popup-' + p.name + '">' var html = '<div class="popup-' + p.name + '">'
+ srcHtml(iconKey, p.label); + srcHtml(iconKey, p.label);
/* Optional lead image (e.g. Wikipedia pageimages thumbnail).
https-only: the URL comes from the provider's API response,
and anything else (protocol-relative, data:, ) is dropped
rather than guessed at. esc() handles attribute safety. */
if (fields.image && /^https:\/\//.test(fields.image)) {
html += '<img class="popup-image" src="' + esc(fields.image)
+ '" alt="" loading="lazy">';
}
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>'; if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
html += '<div class="popup-title">' + esc(fields.title) + '</div>'; html += '<div class="popup-title">' + esc(fields.title) + '</div>';
if (authors) html += '<div class="popup-authors">' + esc(authors) + '</div>'; if (authors) html += '<div class="popup-authors">' + esc(authors) + '</div>';
@ -515,8 +523,14 @@
var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i); var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en'; var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en';
if (sub === 'www') sub = 'en'; if (sub === 'www') sub = 'en';
/* pageimages|extracts in one call: the article's lead
image thumbnail rides along with the intro text.
Thumbnails come from upload.wikimedia.org that host
must stay in the CSP's img-src. */
return 'https://' + sub + '.wikipedia.org/w/api.php' return 'https://' + sub + '.wikipedia.org/w/api.php'
+ '?action=query&prop=extracts&exintro=1&format=json&redirects=1' + '?action=query&prop=extracts%7Cpageimages&exintro=1'
+ '&piprop=thumbnail&pithumbsize=320'
+ '&format=json&redirects=1'
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1])) + '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
+ '&origin=*'; + '&origin=*';
}, },
@ -533,14 +547,21 @@
}); });
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim(); var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
if (!text) return null; if (!text) return null;
return { title: page.title, extract: text }; return {
title: page.title,
extract: text,
image: page.thumbnail && page.thumbnail.source
};
} }
}, },
/* arXiv — Atom API (CORS-broken upstream, proxied). */ /* arXiv Atom API (CORS-broken upstream, proxied).
ID forms: new-style 2403.12345(v2), and old-style
archive/0211159 or archive.SC/0211159 (pre-2007); /pdf/ URLs
may carry a trailing .pdf, which stays outside the capture. */
{ {
name: 'arxiv', label: 'arXiv', name: 'arxiv', label: 'arXiv',
match: /arxiv\.org\/(?:abs|pdf)\/(\d{4}\.\d{4,5}(?:v\d+)?)/, match: /arxiv\.org\/(?:abs|pdf)\/((?:\d{4}\.\d{4,5}|[a-z-]+(?:\.[A-Z]{2})?\/\d{7})(?:v\d+)?)/,
fetchType: 'xml', fetchType: 'xml',
url: function (ctx) { url: function (ctx) {
return '/proxy/arxiv/api/query?id_list=' return '/proxy/arxiv/api/query?id_list='

BIN
static/resume.thumb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 270 KiB