Rich reference popups: arXiv lead figures, prominent Wikipedia images
Reference popups (provider-rendered: arXiv, Wikipedia, CrossRef, …) get a glanceable layout: wider container (560px), larger title and body type, and a full-width image banner under the source label. Internal page previews and item-card popups (new/library pages) keep the compact layout — the shared popup element toggles .link-popup--rich per show based on the rendered content. - arXiv: a new best-effort enrich step fetches the paper's LaTeXML HTML rendition and pulls the first figure as a lead image. Enrich is time-boxed (1.8s) so the metadata popup is never held hostage; late results refresh the cache for the next hover. Figures letterbox with object-fit: contain (plots must not crop); Wikipedia photos cover-crop with an upper focal point. width/height attrs reserve aspect ratio so positioning is stable before the image loads. - Wikipedia thumbnails request 480px for the banner width. - nginx: new ^~ /proxy/arxiv-html/ location backed by arxiv.org proper (export.arxiv.org serves the Atom API but 429s the /html/ asset tree); 404s cached 1d (the common no-HTML-rendition case). All four proxy locations switched to ^~ — without it, static-assets.conf's per-extension regex location outranks plain prefixes and serves a local 404 for any proxied URL ending in an image extension, which is exactly how the first figure fetch failed. Installed and verified live: proxied page (200, 298KB), figure (200 image/png), API unchanged, no-rendition 404 path; the full client resolution chain (relative src -> proxy path -> guard -> image) validated against production. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
59fcc15ca6
commit
1027b88429
|
|
@ -21,6 +21,12 @@
|
|||
# include snippets/popup-proxy.conf;
|
||||
# }
|
||||
|
||||
# All locations use `^~` prefix matching: without it, the regex
|
||||
# location in static-assets.conf (per-extension cache headers) outranks
|
||||
# a plain prefix match and captures any proxied URL ending in an image
|
||||
# extension — e.g. an arXiv figure .png — serving a local 404 instead
|
||||
# of proxying. `^~` short-circuits regex evaluation for this subtree.
|
||||
|
||||
# Shared resolver — needed because proxy_pass uses a variable upstream
|
||||
# (literal upstreams are resolved once at startup; variables defer DNS
|
||||
# to request time, which lets nginx start without the upstream being
|
||||
|
|
@ -31,7 +37,7 @@ resolver_timeout 5s;
|
|||
# ── arXiv ────────────────────────────────────────────────────────────
|
||||
# Atom feed of paper metadata. Abstracts never change after publication
|
||||
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
||||
location /proxy/arxiv/ {
|
||||
location ^~ /proxy/arxiv/ {
|
||||
set $upstream_arxiv export.arxiv.org;
|
||||
# With a VARIABLE upstream, a URI part on proxy_pass is passed to
|
||||
# the upstream literally — "proxy_pass https://$up/;" sends every
|
||||
|
|
@ -67,10 +73,46 @@ location /proxy/arxiv/ {
|
|||
add_header Access-Control-Allow-Origin "$scheme://$host" always;
|
||||
}
|
||||
|
||||
# ── arXiv HTML renditions (lead figures) ─────────────────────────────
|
||||
# popups.js's arXiv enrich step fetches the LaTeXML HTML page to find
|
||||
# the paper's lead figure, then loads the figure image itself — both
|
||||
# through this location. Upstream is arxiv.org proper, NOT
|
||||
# export.arxiv.org: the export host serves the Atom API fine but
|
||||
# rate-limits the /html/ asset tree (429s on figures). Pages can be
|
||||
# large (hundreds of KB), which is exactly why they're cached here.
|
||||
# A 404 is the common no-HTML-rendition case (pre-2024 papers,
|
||||
# unconvertible sources) — cached briefly so hovers don't hammer it.
|
||||
location ^~ /proxy/arxiv-html/ {
|
||||
set $upstream_arxiv_site arxiv.org;
|
||||
rewrite ^/proxy/arxiv-html/(.*)$ /html/$1 break;
|
||||
proxy_pass https://$upstream_arxiv_site;
|
||||
proxy_set_header Host $upstream_arxiv_site;
|
||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||
proxy_ssl_server_name on;
|
||||
|
||||
# Keep the security baseline: the add_header directives below
|
||||
# would otherwise drop it for /proxy/ responses (same pattern
|
||||
# as archive.conf).
|
||||
proxy_hide_header Strict-Transport-Security;
|
||||
proxy_hide_header Content-Security-Policy;
|
||||
proxy_hide_header X-Frame-Options;
|
||||
include snippets/security-headers.conf;
|
||||
|
||||
proxy_cache popup_proxy;
|
||||
proxy_cache_valid 200 30d;
|
||||
proxy_cache_valid 404 1d;
|
||||
proxy_cache_valid any 5m;
|
||||
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
|
||||
proxy_cache_lock on;
|
||||
add_header X-Cache-Status $upstream_cache_status always;
|
||||
|
||||
add_header Access-Control-Allow-Origin "$scheme://$host" always;
|
||||
}
|
||||
|
||||
# ── Internet Archive ─────────────────────────────────────────────────
|
||||
# Item metadata JSON. Item descriptions are author-edited and could
|
||||
# change, but rarely; 7d strikes a reasonable balance.
|
||||
location /proxy/archive/ {
|
||||
location ^~ /proxy/archive/ {
|
||||
set $upstream_archive archive.org;
|
||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||
# on a variable proxy_pass would break this.
|
||||
|
|
@ -106,7 +148,7 @@ location /proxy/archive/ {
|
|||
# Article summaries. NCBI requests a tool=/email= identifier on every
|
||||
# request (https://www.ncbi.nlm.nih.gov/books/NBK25497/); we inject
|
||||
# them server-side so popups.js stays focused on rendering.
|
||||
location /proxy/pubmed/ {
|
||||
location ^~ /proxy/pubmed/ {
|
||||
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||
# on a variable proxy_pass would break this.
|
||||
|
|
|
|||
|
|
@ -26,6 +26,32 @@
|
|||
transition: opacity 0.14s ease, visibility 0.14s ease;
|
||||
}
|
||||
|
||||
/* Rich layout for reference popups (provider-rendered: arXiv,
|
||||
Wikipedia, CrossRef, …) — wider, larger type, image banner. Internal
|
||||
page previews and item-card popups (new/library pages) deliberately
|
||||
keep the compact base layout. Toggled by popups.js per show. */
|
||||
.link-popup--rich {
|
||||
max-width: 560px;
|
||||
padding: 0.85rem 1.05rem;
|
||||
}
|
||||
|
||||
.link-popup--rich .popup-title {
|
||||
font-size: 0.98rem;
|
||||
line-height: 1.3;
|
||||
margin-bottom: 0.35rem;
|
||||
}
|
||||
|
||||
.link-popup--rich .popup-authors,
|
||||
.link-popup--rich .popup-meta {
|
||||
font-size: 0.78rem;
|
||||
}
|
||||
|
||||
.link-popup--rich .popup-abstract,
|
||||
.link-popup--rich .popup-extract {
|
||||
font-size: 0.84rem;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.link-popup.is-visible {
|
||||
opacity: 1;
|
||||
visibility: visible;
|
||||
|
|
@ -78,16 +104,29 @@
|
|||
line-height: 1.35;
|
||||
}
|
||||
|
||||
/* Optional lead image (Wikipedia pageimages thumbnail, etc.) — floats
|
||||
beside the title/extract so text wraps around it; contained by the
|
||||
popup's own overflow box. */
|
||||
.popup-image {
|
||||
float: right;
|
||||
max-width: 96px;
|
||||
max-height: 120px;
|
||||
margin: 0 0 0.4rem 0.6rem;
|
||||
border-radius: 4px;
|
||||
/* Lead image (Wikipedia pageimages thumbnail, arXiv lead figure) — a
|
||||
full-width banner between the source label and the title, sized for
|
||||
the rich provider layout. Photos crop to fill (faces usually sit in
|
||||
the upper portion, hence the 30% focal point); figures — plots,
|
||||
architecture diagrams — must never be cropped, so .is-figure
|
||||
letterboxes with contain on a muted backdrop instead. width/height
|
||||
attrs from the provider reserve the aspect ratio, so the popup is
|
||||
measured and positioned correctly before the image arrives. */
|
||||
.popup-image-banner {
|
||||
display: block;
|
||||
width: 100%;
|
||||
height: auto;
|
||||
max-height: 220px;
|
||||
object-fit: cover;
|
||||
object-position: center 30%;
|
||||
border-radius: 5px;
|
||||
border: 1px solid var(--border-muted);
|
||||
margin: 0.4rem 0 0.55rem;
|
||||
}
|
||||
|
||||
.popup-image-banner.is-figure {
|
||||
object-fit: contain;
|
||||
background: color-mix(in srgb, var(--text-faint) 7%, transparent);
|
||||
}
|
||||
|
||||
.popup-abstract,
|
||||
|
|
|
|||
|
|
@ -206,6 +206,12 @@
|
|||
} else {
|
||||
return;
|
||||
}
|
||||
/* Reference popups (provider-rendered: arXiv, Wikipedia,
|
||||
…) get the larger glanceable layout; internal page
|
||||
previews keep the compact one. Toggled per show since
|
||||
the popup element is shared. */
|
||||
popup.classList.toggle('link-popup--rich',
|
||||
!!popup.querySelector('.popup-provider'));
|
||||
positionPopup(target);
|
||||
popup.classList.add('is-visible');
|
||||
popup.setAttribute('aria-hidden', 'false');
|
||||
|
|
@ -451,15 +457,31 @@
|
|||
var bodyKey = fields.extract !== undefined ? 'extract' : 'abstract';
|
||||
var body = truncate(fields[bodyKey], p.bodyLimit || 500);
|
||||
|
||||
var html = '<div class="popup-' + p.name + '">'
|
||||
/* The shared popup-provider class is what scheduleShow keys the
|
||||
larger .link-popup--rich layout on — reference popups (arXiv,
|
||||
Wikipedia, …) get the roomier glanceable treatment; internal
|
||||
page previews and item cards keep the compact one. */
|
||||
var html = '<div class="popup-' + p.name + ' popup-provider">'
|
||||
+ srcHtml(iconKey, p.label);
|
||||
/* Optional lead image (e.g. Wikipedia pageimages thumbnail).
|
||||
https-only: the URL comes from the provider's API response,
|
||||
and anything else (protocol-relative, data:, …) is dropped
|
||||
rather than guessed at. esc() handles attribute safety. */
|
||||
if (fields.image && /^https:\/\//.test(fields.image)) {
|
||||
html += '<img class="popup-image" src="' + esc(fields.image)
|
||||
+ '" alt="" loading="lazy">';
|
||||
/* Optional lead image (Wikipedia pageimages thumbnail, arXiv
|
||||
lead figure) as a full-width banner under the source label.
|
||||
Accepted srcs: https://… (API-supplied) or root-relative
|
||||
/proxy/… (same-origin proxied figures); anything else
|
||||
(protocol-relative, data:, …) is dropped rather than guessed
|
||||
at. width/height attrs reserve the aspect ratio so the popup
|
||||
is positioned correctly before the image arrives. Photos
|
||||
cover-crop; figures (plots, diagrams) must never be cropped,
|
||||
so imageKind 'figure' letterboxes with object-fit: contain. */
|
||||
var img = fields.image;
|
||||
if (img && img.src &&
|
||||
(/^https:\/\//.test(img.src) || /^\/(?!\/)/.test(img.src))) {
|
||||
html += '<img class="popup-image-banner'
|
||||
+ (fields.imageKind === 'figure' ? ' is-figure' : '')
|
||||
+ '" src="' + esc(img.src) + '"'
|
||||
+ (img.width && img.height
|
||||
? ' width="' + (+img.width) + '" height="' + (+img.height) + '"'
|
||||
: '')
|
||||
+ ' alt="" loading="lazy">';
|
||||
}
|
||||
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
||||
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
||||
|
|
@ -490,8 +512,31 @@
|
|||
|
||||
return fetcher(url, p.fetchInit).then(function (data) {
|
||||
if (!data) return null;
|
||||
var html = renderPopup(p, p.parse(data, ctx));
|
||||
return html ? store(href, html) : null;
|
||||
var fields = p.parse(data, ctx);
|
||||
if (!fields) return null;
|
||||
var finish = function (f) {
|
||||
var html = renderPopup(p, f);
|
||||
return html ? store(href, html) : null;
|
||||
};
|
||||
if (!p.enrich) return finish(fields);
|
||||
/* enrich() is best-effort decoration (e.g. the arXiv lead
|
||||
figure needs a second, potentially large fetch). Time-box
|
||||
it so the metadata popup is never held hostage; if the
|
||||
enrichment lands late, refresh the cache so the NEXT
|
||||
hover gets the decorated version. enrich must return a
|
||||
new fields object, leaving the original untouched for
|
||||
the timeout path. */
|
||||
var enriched = Promise.resolve()
|
||||
.then(function () { return p.enrich(fields, ctx); })
|
||||
.catch(function () { return null; });
|
||||
var timeout = new Promise(function (resolve) {
|
||||
setTimeout(resolve, 1800, undefined);
|
||||
});
|
||||
return Promise.race([enriched, timeout]).then(function (f) {
|
||||
if (f !== undefined) return finish(f || fields);
|
||||
enriched.then(function (late) { if (late) finish(late); });
|
||||
return finish(fields);
|
||||
});
|
||||
}).catch(function () { return null; });
|
||||
}
|
||||
|
||||
|
|
@ -526,10 +571,11 @@
|
|||
/* pageimages|extracts in one call: the article's lead
|
||||
image thumbnail rides along with the intro text.
|
||||
Thumbnails come from upload.wikimedia.org — that host
|
||||
must stay in the CSP's img-src. */
|
||||
must stay in the CSP's img-src. 480px because the
|
||||
banner spans the rich popup's full width. */
|
||||
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
||||
+ '?action=query&prop=extracts%7Cpageimages&exintro=1'
|
||||
+ '&piprop=thumbnail&pithumbsize=320'
|
||||
+ '&piprop=thumbnail&pithumbsize=480'
|
||||
+ '&format=json&redirects=1'
|
||||
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
||||
+ '&origin=*';
|
||||
|
|
@ -547,10 +593,16 @@
|
|||
});
|
||||
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
||||
if (!text) return null;
|
||||
var thumb = page.thumbnail;
|
||||
return {
|
||||
title: page.title,
|
||||
extract: text,
|
||||
image: page.thumbnail && page.thumbnail.source
|
||||
title: page.title,
|
||||
extract: text,
|
||||
image: thumb && thumb.source
|
||||
? { src: thumb.source,
|
||||
width: thumb.width,
|
||||
height: thumb.height }
|
||||
: null,
|
||||
imageKind: 'photo'
|
||||
};
|
||||
}
|
||||
},
|
||||
|
|
@ -578,6 +630,47 @@
|
|||
.map(function (el) { return el.textContent.trim(); }),
|
||||
abstract: summaryEl.textContent.trim().replace(/\s+/g, ' ')
|
||||
};
|
||||
},
|
||||
/* Lead figure, best-effort: arXiv's LaTeXML HTML rendition
|
||||
(when one exists — roughly 2024+ papers with convertible
|
||||
sources) carries the paper's figures. The first
|
||||
figure.ltx_figure img is almost always the teaser /
|
||||
architecture figure. Page and image both ride through
|
||||
/proxy/arxiv-html/ (arxiv.org upstream — export.arxiv.org
|
||||
rate-limits the /html/ asset tree), so img-src 'self'
|
||||
covers them and nginx caches the heavy page fetch. */
|
||||
enrich: function (fields, ctx) {
|
||||
var id = ctx.match[1].replace(/v\d+$/, '');
|
||||
return fetch('/proxy/arxiv-html/' + encodeURIComponent(id))
|
||||
.then(function (r) {
|
||||
if (!r.ok) return fields;
|
||||
return r.text().then(function (text) {
|
||||
var doc = new DOMParser().parseFromString(text, 'text/html');
|
||||
var img = doc.querySelector('figure.ltx_figure img.ltx_graphics');
|
||||
var src = img && img.getAttribute('src');
|
||||
if (!src || /^(?:data|javascript):/i.test(src)) return fields;
|
||||
/* Resolve against the page URL (r.url keeps any
|
||||
redirect) — relative srcs like
|
||||
"2410.21276v1/assets/x.png" become siblings
|
||||
under /proxy/arxiv-html/. An upstream-absolute
|
||||
"/html/…" src maps back into proxy space; any
|
||||
other host is dropped. */
|
||||
var resolved = new URL(src, r.url);
|
||||
if (resolved.origin !== location.origin) return fields;
|
||||
var path = resolved.pathname;
|
||||
if (path.indexOf('/html/') === 0) {
|
||||
path = '/proxy/arxiv-html/' + path.slice('/html/'.length);
|
||||
}
|
||||
if (path.indexOf('/proxy/arxiv-html/') !== 0) return fields;
|
||||
return Object.assign({}, fields, {
|
||||
image: { src: path,
|
||||
width: img.getAttribute('width'),
|
||||
height: img.getAttribute('height') },
|
||||
imageKind: 'figure'
|
||||
});
|
||||
});
|
||||
})
|
||||
.catch(function () { return fields; });
|
||||
}
|
||||
},
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue