Fix popup previews: proxy prefix-strip bug, arXiv IDs, Wikipedia images
The root cause of 'PDF/arXiv previews simply do not work' was twofold:
1. nginx/popup-proxy.conf was never installed on the VPS — every
/proxy/* request (arXiv, PubMed, Internet Archive) returned nginx's
default 404. Now installed (snippets + http{}-context cache/limit
zones in conf.d, included in the vhost, nginx -t verified, reloaded).
2. The snippet itself had a latent bug that only surfaced once
installed: with a VARIABLE upstream, a URI part on proxy_pass is
passed literally — every request hit the upstream's homepage
(archive.org HTML where JSON was expected, arXiv 429s, NCBI doc-page
redirects). Fixed with explicit prefix-strip rewrites; bad cached
responses purged. All three proxies verified returning real data,
including a live arXiv title resolve.
Client-side improvements:
- arXiv match covers old-style IDs (cs/9901002, math.GT/0309136,
cond-mat/...v1) alongside new-style, and .pdf-suffixed /pdf/ URLs
(regex verified against six forms)
- Wikipedia popups show the article's lead image: pageimages rides
along the existing extracts call (pithumbsize=320), rendered via a
new https-only image slot in renderPopup with float styling;
upload.wikimedia.org added to the CSP's img-src
- pdf-thumbs now walks all of static/ (pdfjs pruned), so /cv.pdf and
/resume.pdf — the most-linked internal PDFs, previously thumbnail-less
and therefore popup-less — get hover previews
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
5d344f940e
commit
23250d8782
5
Makefile
5
Makefile
|
|
@ -122,8 +122,11 @@ pdf-thumbs:
|
|||
# A failing pdftoppm must at least warn: the `find | while` pipeline's
|
||||
# exit status is the last iteration's, so without the `||` a corrupt
|
||||
# PDF would silently ship without a thumbnail.
|
||||
# Walk ALL of static/ (not just papers/): /cv.pdf and /resume.pdf are
|
||||
# the most-linked PDFs on the site and need hover thumbnails too.
|
||||
# pdfjs/ is pruned — the vendored viewer ships sample PDFs.
|
||||
@if command -v pdftoppm >/dev/null 2>&1; then \
|
||||
find static/papers -name '*.pdf' 2>/dev/null | while read pdf; do \
|
||||
find static -path static/pdfjs -prune -o -name '*.pdf' -print 2>/dev/null | while read pdf; do \
|
||||
thumb="$${pdf%.pdf}.thumb"; \
|
||||
if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \
|
||||
echo " pdf-thumb $$pdf"; \
|
||||
|
|
|
|||
|
|
@ -33,7 +33,12 @@ resolver_timeout 5s;
|
|||
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
||||
location /proxy/arxiv/ {
|
||||
set $upstream_arxiv export.arxiv.org;
|
||||
proxy_pass https://$upstream_arxiv/;
|
||||
# With a VARIABLE upstream, a URI part on proxy_pass is passed to
|
||||
# the upstream literally — "proxy_pass https://$up/;" sends every
|
||||
# request to the upstream's homepage instead of prefix-stripping.
|
||||
# Strip the prefix explicitly; `break` keeps args intact.
|
||||
rewrite ^/proxy/arxiv/(.*)$ /$1 break;
|
||||
proxy_pass https://$upstream_arxiv;
|
||||
proxy_set_header Host $upstream_arxiv;
|
||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||
proxy_ssl_server_name on;
|
||||
|
|
@ -55,7 +60,10 @@ location /proxy/arxiv/ {
|
|||
# change, but rarely; 7d strikes a reasonable balance.
|
||||
location /proxy/archive/ {
|
||||
set $upstream_archive archive.org;
|
||||
proxy_pass https://$upstream_archive/;
|
||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||
# on a variable proxy_pass would break this.
|
||||
rewrite ^/proxy/archive/(.*)$ /$1 break;
|
||||
proxy_pass https://$upstream_archive;
|
||||
proxy_set_header Host $upstream_archive;
|
||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||
proxy_ssl_server_name on;
|
||||
|
|
@ -76,7 +84,10 @@ location /proxy/archive/ {
|
|||
# them server-side so popups.js stays focused on rendering.
|
||||
location /proxy/pubmed/ {
|
||||
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
||||
proxy_pass https://$upstream_pubmed/;
|
||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||
# on a variable proxy_pass would break this.
|
||||
rewrite ^/proxy/pubmed/(.*)$ /$1 break;
|
||||
proxy_pass https://$upstream_pubmed;
|
||||
proxy_set_header Host $upstream_pubmed;
|
||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||
proxy_ssl_server_name on;
|
||||
|
|
|
|||
|
|
@ -75,4 +75,4 @@ add_header Permissions-Policy
|
|||
#
|
||||
# To collect violation reports, set up a `report-uri` endpoint and add
|
||||
# `report-uri /csp-report;` (and/or `report-to <group>;`) below.
|
||||
add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com; font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
|
||||
add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com https://upload.wikimedia.org;font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
|
||||
|
|
|
|||
|
|
@ -78,6 +78,18 @@
|
|||
line-height: 1.35;
|
||||
}
|
||||
|
||||
/* Optional lead image (Wikipedia pageimages thumbnail, etc.) — floats
|
||||
beside the title/extract so text wraps around it; contained by the
|
||||
popup's own overflow box. */
|
||||
.popup-image {
|
||||
float: right;
|
||||
max-width: 96px;
|
||||
max-height: 120px;
|
||||
margin: 0 0 0.4rem 0.6rem;
|
||||
border-radius: 4px;
|
||||
border: 1px solid var(--border-muted);
|
||||
}
|
||||
|
||||
.popup-abstract,
|
||||
.popup-extract {
|
||||
font-size: 0.78rem;
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 192 KiB |
|
|
@ -453,6 +453,14 @@
|
|||
|
||||
var html = '<div class="popup-' + p.name + '">'
|
||||
+ srcHtml(iconKey, p.label);
|
||||
/* Optional lead image (e.g. Wikipedia pageimages thumbnail).
|
||||
https-only: the URL comes from the provider's API response,
|
||||
and anything else (protocol-relative, data:, …) is dropped
|
||||
rather than guessed at. esc() handles attribute safety. */
|
||||
if (fields.image && /^https:\/\//.test(fields.image)) {
|
||||
html += '<img class="popup-image" src="' + esc(fields.image)
|
||||
+ '" alt="" loading="lazy">';
|
||||
}
|
||||
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
||||
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
||||
if (authors) html += '<div class="popup-authors">' + esc(authors) + '</div>';
|
||||
|
|
@ -515,8 +523,14 @@
|
|||
var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
|
||||
var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en';
|
||||
if (sub === 'www') sub = 'en';
|
||||
/* pageimages|extracts in one call: the article's lead
|
||||
image thumbnail rides along with the intro text.
|
||||
Thumbnails come from upload.wikimedia.org — that host
|
||||
must stay in the CSP's img-src. */
|
||||
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
||||
+ '?action=query&prop=extracts&exintro=1&format=json&redirects=1'
|
||||
+ '?action=query&prop=extracts%7Cpageimages&exintro=1'
|
||||
+ '&piprop=thumbnail&pithumbsize=320'
|
||||
+ '&format=json&redirects=1'
|
||||
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
||||
+ '&origin=*';
|
||||
},
|
||||
|
|
@ -533,14 +547,21 @@
|
|||
});
|
||||
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
||||
if (!text) return null;
|
||||
return { title: page.title, extract: text };
|
||||
return {
|
||||
title: page.title,
|
||||
extract: text,
|
||||
image: page.thumbnail && page.thumbnail.source
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
/* arXiv — Atom API (CORS-broken upstream, proxied). */
|
||||
/* arXiv — Atom API (CORS-broken upstream, proxied).
|
||||
ID forms: new-style 2403.12345(v2), and old-style
|
||||
archive/0211159 or archive.SC/0211159 (pre-2007); /pdf/ URLs
|
||||
may carry a trailing .pdf, which stays outside the capture. */
|
||||
{
|
||||
name: 'arxiv', label: 'arXiv',
|
||||
match: /arxiv\.org\/(?:abs|pdf)\/(\d{4}\.\d{4,5}(?:v\d+)?)/,
|
||||
match: /arxiv\.org\/(?:abs|pdf)\/((?:\d{4}\.\d{4,5}|[a-z-]+(?:\.[A-Z]{2})?\/\d{7})(?:v\d+)?)/,
|
||||
fetchType: 'xml',
|
||||
url: function (ctx) {
|
||||
return '/proxy/arxiv/api/query?id_list='
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 270 KiB |
Loading…
Reference in New Issue