Fix popup previews: proxy prefix-strip bug, arXiv IDs, Wikipedia images

The root cause of 'PDF/arXiv previews simply do not work' was twofold: 1. nginx/popup-proxy.conf was never installed on the VPS — every /proxy/* request (arXiv, PubMed, Internet Archive) returned nginx's default 404. Now installed (snippets + http{}-context cache/limit zones in conf.d, included in the vhost, nginx -t verified, reloaded). 2. The snippet itself had a latent bug that only surfaced once installed: with a VARIABLE upstream, a URI part on proxy_pass is passed literally — every request hit the upstream's homepage (archive.org HTML where JSON was expected, arXiv 429s, NCBI doc-page redirects). Fixed with explicit prefix-strip rewrites; bad cached responses purged. All three proxies verified returning real data, including a live arXiv title resolve. Client-side improvements: - arXiv match covers old-style IDs (cs/9901002, math.GT/0309136, cond-mat/...v1) alongside new-style, and .pdf-suffixed /pdf/ URLs (regex verified against six forms) - Wikipedia popups show the article's lead image: pageimages rides along the existing extracts call (pithumbsize=320), rendered via a new https-only image slot in renderPopup with float styling; upload.wikimedia.org added to the CSP's img-src - pdf-thumbs now walks all of static/ (pdfjs pruned), so /cv.pdf and /resume.pdf — the most-linked internal PDFs, previously thumbnail-less and therefore popup-less — get hover previews Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:06:13 -04:00 · 2026-06-10 12:06:13 -04:00 · 23250d8782
parent 5d344f940e
commit 23250d8782
7 changed files with 56 additions and 9 deletions
--- a/5
+++ b/5
@ -122,8 +122,11 @@ pdf-thumbs:
 	# A failing pdftoppm must at least warn: the `find | while` pipeline's
 	# exit status is the last iteration's, so without the `||` a corrupt
 	# PDF would silently ship without a thumbnail.
+	# Walk ALL of static/ (not just papers/): /cv.pdf and /resume.pdf are
+	# the most-linked PDFs on the site and need hover thumbnails too.
+	# pdfjs/ is pruned — the vendored viewer ships sample PDFs.
 	@if command -v pdftoppm >/dev/null 2>&1; then \
-	  find static/papers -name '*.pdf' 2>/dev/null | while read pdf; do \
+	  find static -path static/pdfjs -prune -o -name '*.pdf' -print 2>/dev/null | while read pdf; do \
 	    thumb="$${pdf%.pdf}.thumb"; \
 	    if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \
 	      echo "  pdf-thumb $$pdf"; \
--- a/nginx/popup-proxy.conf
+++ b/nginx/popup-proxy.conf
@ -33,7 +33,12 @@ resolver_timeout 5s;
 # (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
 location /proxy/arxiv/ {
    set $upstream_arxiv export.arxiv.org;
-    proxy_pass https://$upstream_arxiv/;
+    # With a VARIABLE upstream, a URI part on proxy_pass is passed to
+    # the upstream literally — "proxy_pass https://$up/;" sends every
+    # request to the upstream's homepage instead of prefix-stripping.
+    # Strip the prefix explicitly; `break` keeps args intact.
+    rewrite ^/proxy/arxiv/(.*)$ /$1 break;
+    proxy_pass https://$upstream_arxiv;
    proxy_set_header Host $upstream_arxiv;
    proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
    proxy_ssl_server_name on;
@ -55,7 +60,10 @@ location /proxy/arxiv/ {
 # change, but rarely; 7d strikes a reasonable balance.
 location /proxy/archive/ {
    set $upstream_archive archive.org;
-    proxy_pass https://$upstream_archive/;
+    # Prefix-strip explicitly — see the arXiv block for why a URI part
+    # on a variable proxy_pass would break this.
+    rewrite ^/proxy/archive/(.*)$ /$1 break;
+    proxy_pass https://$upstream_archive;
    proxy_set_header Host $upstream_archive;
    proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
    proxy_ssl_server_name on;
@ -76,7 +84,10 @@ location /proxy/archive/ {
 # them server-side so popups.js stays focused on rendering.
 location /proxy/pubmed/ {
    set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
-    proxy_pass https://$upstream_pubmed/;
+    # Prefix-strip explicitly — see the arXiv block for why a URI part
+    # on a variable proxy_pass would break this.
+    rewrite ^/proxy/pubmed/(.*)$ /$1 break;
+    proxy_pass https://$upstream_pubmed;
    proxy_set_header Host $upstream_pubmed;
    proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
    proxy_ssl_server_name on;
--- a/nginx/security-headers.conf
+++ b/nginx/security-headers.conf
@ -75,4 +75,4 @@ add_header Permissions-Policy
 #
 # To collect violation reports, set up a `report-uri` endpoint and add
 # `report-uri /csp-report;` (and/or `report-to <group>;`) below.
-add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com; font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
+add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com https://upload.wikimedia.org;font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
--- a/static/css/popups.css
+++ b/static/css/popups.css
@ -78,6 +78,18 @@
    line-height: 1.35;
 }

+/* Optional lead image (Wikipedia pageimages thumbnail, etc.) — floats
+   beside the title/extract so text wraps around it; contained by the
+   popup's own overflow box. */
+.popup-image {
+    float: right;
+    max-width: 96px;
+    max-height: 120px;
+    margin: 0 0 0.4rem 0.6rem;
+    border-radius: 4px;
+    border: 1px solid var(--border-muted);
+}
+
 .popup-abstract,
 .popup-extract {
    font-size: 0.78rem;
--- a/static/cv.thumb.png
+++ b/static/cv.thumb.png
--- a/static/js/popups.js
+++ b/static/js/popups.js
@ -453,6 +453,14 @@

        var html = '<div class="popup-' + p.name + '">'
                 + srcHtml(iconKey, p.label);
+        /* Optional lead image (e.g. Wikipedia pageimages thumbnail).
+           https-only: the URL comes from the provider's API response,
+           and anything else (protocol-relative, data:, …) is dropped
+           rather than guessed at. esc() handles attribute safety. */
+        if (fields.image && /^https:\/\//.test(fields.image)) {
+            html += '<img class="popup-image" src="' + esc(fields.image)
+                  + '" alt="" loading="lazy">';
+        }
        if (fields.tags)    html += '<div class="popup-tags">'    + esc(fields.tags)  + '</div>';
        html               += '<div class="popup-title">'         + esc(fields.title) + '</div>';
        if (authors)        html += '<div class="popup-authors">' + esc(authors)      + '</div>';
@ -515,8 +523,14 @@
                var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
                var sub       = hostMatch ? hostMatch[1].toLowerCase() : 'en';
                if (sub === 'www') sub = 'en';
+                /* pageimages|extracts in one call: the article's lead
+                   image thumbnail rides along with the intro text.
+                   Thumbnails come from upload.wikimedia.org — that host
+                   must stay in the CSP's img-src. */
                return 'https://' + sub + '.wikipedia.org/w/api.php'
-                     + '?action=query&prop=extracts&exintro=1&format=json&redirects=1'
+                     + '?action=query&prop=extracts%7Cpageimages&exintro=1'
+                     + '&piprop=thumbnail&pithumbsize=320'
+                     + '&format=json&redirects=1'
                     + '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
                     + '&origin=*';
            },
@ -533,14 +547,21 @@
                });
                var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
                if (!text) return null;
-                return { title: page.title, extract: text };
+                return {
+                    title:   page.title,
+                    extract: text,
+                    image:   page.thumbnail && page.thumbnail.source
+                };
            }
        },

-        /* arXiv — Atom API (CORS-broken upstream, proxied). */
+        /* arXiv — Atom API (CORS-broken upstream, proxied).
+           ID forms: new-style 2403.12345(v2), and old-style
+           archive/0211159 or archive.SC/0211159 (pre-2007); /pdf/ URLs
+           may carry a trailing .pdf, which stays outside the capture. */
        {
            name: 'arxiv', label: 'arXiv',
-            match: /arxiv\.org\/(?:abs|pdf)\/(\d{4}\.\d{4,5}(?:v\d+)?)/,
+            match: /arxiv\.org\/(?:abs|pdf)\/((?:\d{4}\.\d{4,5}|[a-z-]+(?:\.[A-Z]{2})?\/\d{7})(?:v\d+)?)/,
            fetchType: 'xml',
            url: function (ctx) {
                return '/proxy/arxiv/api/query?id_list='
--- a/static/resume.thumb.png
+++ b/static/resume.thumb.png