Rich reference popups: arXiv lead figures, prominent Wikipedia images
Reference popups (provider-rendered: arXiv, Wikipedia, CrossRef, …) get a glanceable layout: wider container (560px), larger title and body type, and a full-width image banner under the source label. Internal page previews and item-card popups (new/library pages) keep the compact layout — the shared popup element toggles .link-popup--rich per show based on the rendered content. - arXiv: a new best-effort enrich step fetches the paper's LaTeXML HTML rendition and pulls the first figure as a lead image. Enrich is time-boxed (1.8s) so the metadata popup is never held hostage; late results refresh the cache for the next hover. Figures letterbox with object-fit: contain (plots must not crop); Wikipedia photos cover-crop with an upper focal point. width/height attrs reserve aspect ratio so positioning is stable before the image loads. - Wikipedia thumbnails request 480px for the banner width. - nginx: new ^~ /proxy/arxiv-html/ location backed by arxiv.org proper (export.arxiv.org serves the Atom API but 429s the /html/ asset tree); 404s cached 1d (the common no-HTML-rendition case). All four proxy locations switched to ^~ — without it, static-assets.conf's per-extension regex location outranks plain prefixes and serves a local 404 for any proxied URL ending in an image extension, which is exactly how the first figure fetch failed. Installed and verified live: proxied page (200, 298KB), figure (200 image/png), API unchanged, no-rendition 404 path; the full client resolution chain (relative src -> proxy path -> guard -> image) validated against production. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
59fcc15ca6
commit
1027b88429
|
|
@ -21,6 +21,12 @@
|
||||||
# include snippets/popup-proxy.conf;
|
# include snippets/popup-proxy.conf;
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
# All locations use `^~` prefix matching: without it, the regex
|
||||||
|
# location in static-assets.conf (per-extension cache headers) outranks
|
||||||
|
# a plain prefix match and captures any proxied URL ending in an image
|
||||||
|
# extension — e.g. an arXiv figure .png — serving a local 404 instead
|
||||||
|
# of proxying. `^~` short-circuits regex evaluation for this subtree.
|
||||||
|
|
||||||
# Shared resolver — needed because proxy_pass uses a variable upstream
|
# Shared resolver — needed because proxy_pass uses a variable upstream
|
||||||
# (literal upstreams are resolved once at startup; variables defer DNS
|
# (literal upstreams are resolved once at startup; variables defer DNS
|
||||||
# to request time, which lets nginx start without the upstream being
|
# to request time, which lets nginx start without the upstream being
|
||||||
|
|
@ -31,7 +37,7 @@ resolver_timeout 5s;
|
||||||
# ── arXiv ────────────────────────────────────────────────────────────
|
# ── arXiv ────────────────────────────────────────────────────────────
|
||||||
# Atom feed of paper metadata. Abstracts never change after publication
|
# Atom feed of paper metadata. Abstracts never change after publication
|
||||||
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
||||||
location /proxy/arxiv/ {
|
location ^~ /proxy/arxiv/ {
|
||||||
set $upstream_arxiv export.arxiv.org;
|
set $upstream_arxiv export.arxiv.org;
|
||||||
# With a VARIABLE upstream, a URI part on proxy_pass is passed to
|
# With a VARIABLE upstream, a URI part on proxy_pass is passed to
|
||||||
# the upstream literally — "proxy_pass https://$up/;" sends every
|
# the upstream literally — "proxy_pass https://$up/;" sends every
|
||||||
|
|
@ -67,10 +73,46 @@ location /proxy/arxiv/ {
|
||||||
add_header Access-Control-Allow-Origin "$scheme://$host" always;
|
add_header Access-Control-Allow-Origin "$scheme://$host" always;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── arXiv HTML renditions (lead figures) ─────────────────────────────
|
||||||
|
# popups.js's arXiv enrich step fetches the LaTeXML HTML page to find
|
||||||
|
# the paper's lead figure, then loads the figure image itself — both
|
||||||
|
# through this location. Upstream is arxiv.org proper, NOT
|
||||||
|
# export.arxiv.org: the export host serves the Atom API fine but
|
||||||
|
# rate-limits the /html/ asset tree (429s on figures). Pages can be
|
||||||
|
# large (hundreds of KB), which is exactly why they're cached here.
|
||||||
|
# A 404 is the common no-HTML-rendition case (pre-2024 papers,
|
||||||
|
# unconvertible sources) — cached briefly so hovers don't hammer it.
|
||||||
|
location ^~ /proxy/arxiv-html/ {
|
||||||
|
set $upstream_arxiv_site arxiv.org;
|
||||||
|
rewrite ^/proxy/arxiv-html/(.*)$ /html/$1 break;
|
||||||
|
proxy_pass https://$upstream_arxiv_site;
|
||||||
|
proxy_set_header Host $upstream_arxiv_site;
|
||||||
|
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||||
|
proxy_ssl_server_name on;
|
||||||
|
|
||||||
|
# Keep the security baseline: the add_header directives below
|
||||||
|
# would otherwise drop it for /proxy/ responses (same pattern
|
||||||
|
# as archive.conf).
|
||||||
|
proxy_hide_header Strict-Transport-Security;
|
||||||
|
proxy_hide_header Content-Security-Policy;
|
||||||
|
proxy_hide_header X-Frame-Options;
|
||||||
|
include snippets/security-headers.conf;
|
||||||
|
|
||||||
|
proxy_cache popup_proxy;
|
||||||
|
proxy_cache_valid 200 30d;
|
||||||
|
proxy_cache_valid 404 1d;
|
||||||
|
proxy_cache_valid any 5m;
|
||||||
|
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
|
||||||
|
proxy_cache_lock on;
|
||||||
|
add_header X-Cache-Status $upstream_cache_status always;
|
||||||
|
|
||||||
|
add_header Access-Control-Allow-Origin "$scheme://$host" always;
|
||||||
|
}
|
||||||
|
|
||||||
# ── Internet Archive ─────────────────────────────────────────────────
|
# ── Internet Archive ─────────────────────────────────────────────────
|
||||||
# Item metadata JSON. Item descriptions are author-edited and could
|
# Item metadata JSON. Item descriptions are author-edited and could
|
||||||
# change, but rarely; 7d strikes a reasonable balance.
|
# change, but rarely; 7d strikes a reasonable balance.
|
||||||
location /proxy/archive/ {
|
location ^~ /proxy/archive/ {
|
||||||
set $upstream_archive archive.org;
|
set $upstream_archive archive.org;
|
||||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||||
# on a variable proxy_pass would break this.
|
# on a variable proxy_pass would break this.
|
||||||
|
|
@ -106,7 +148,7 @@ location /proxy/archive/ {
|
||||||
# Article summaries. NCBI requests a tool=/email= identifier on every
|
# Article summaries. NCBI requests a tool=/email= identifier on every
|
||||||
# request (https://www.ncbi.nlm.nih.gov/books/NBK25497/); we inject
|
# request (https://www.ncbi.nlm.nih.gov/books/NBK25497/); we inject
|
||||||
# them server-side so popups.js stays focused on rendering.
|
# them server-side so popups.js stays focused on rendering.
|
||||||
location /proxy/pubmed/ {
|
location ^~ /proxy/pubmed/ {
|
||||||
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
||||||
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||||
# on a variable proxy_pass would break this.
|
# on a variable proxy_pass would break this.
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,32 @@
|
||||||
transition: opacity 0.14s ease, visibility 0.14s ease;
|
transition: opacity 0.14s ease, visibility 0.14s ease;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Rich layout for reference popups (provider-rendered: arXiv,
|
||||||
|
Wikipedia, CrossRef, …) — wider, larger type, image banner. Internal
|
||||||
|
page previews and item-card popups (new/library pages) deliberately
|
||||||
|
keep the compact base layout. Toggled by popups.js per show. */
|
||||||
|
.link-popup--rich {
|
||||||
|
max-width: 560px;
|
||||||
|
padding: 0.85rem 1.05rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.link-popup--rich .popup-title {
|
||||||
|
font-size: 0.98rem;
|
||||||
|
line-height: 1.3;
|
||||||
|
margin-bottom: 0.35rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.link-popup--rich .popup-authors,
|
||||||
|
.link-popup--rich .popup-meta {
|
||||||
|
font-size: 0.78rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.link-popup--rich .popup-abstract,
|
||||||
|
.link-popup--rich .popup-extract {
|
||||||
|
font-size: 0.84rem;
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
.link-popup.is-visible {
|
.link-popup.is-visible {
|
||||||
opacity: 1;
|
opacity: 1;
|
||||||
visibility: visible;
|
visibility: visible;
|
||||||
|
|
@ -78,16 +104,29 @@
|
||||||
line-height: 1.35;
|
line-height: 1.35;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Optional lead image (Wikipedia pageimages thumbnail, etc.) — floats
|
/* Lead image (Wikipedia pageimages thumbnail, arXiv lead figure) — a
|
||||||
beside the title/extract so text wraps around it; contained by the
|
full-width banner between the source label and the title, sized for
|
||||||
popup's own overflow box. */
|
the rich provider layout. Photos crop to fill (faces usually sit in
|
||||||
.popup-image {
|
the upper portion, hence the 30% focal point); figures — plots,
|
||||||
float: right;
|
architecture diagrams — must never be cropped, so .is-figure
|
||||||
max-width: 96px;
|
letterboxes with contain on a muted backdrop instead. width/height
|
||||||
max-height: 120px;
|
attrs from the provider reserve the aspect ratio, so the popup is
|
||||||
margin: 0 0 0.4rem 0.6rem;
|
measured and positioned correctly before the image arrives. */
|
||||||
border-radius: 4px;
|
.popup-image-banner {
|
||||||
|
display: block;
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
max-height: 220px;
|
||||||
|
object-fit: cover;
|
||||||
|
object-position: center 30%;
|
||||||
|
border-radius: 5px;
|
||||||
border: 1px solid var(--border-muted);
|
border: 1px solid var(--border-muted);
|
||||||
|
margin: 0.4rem 0 0.55rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.popup-image-banner.is-figure {
|
||||||
|
object-fit: contain;
|
||||||
|
background: color-mix(in srgb, var(--text-faint) 7%, transparent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.popup-abstract,
|
.popup-abstract,
|
||||||
|
|
|
||||||
|
|
@ -206,6 +206,12 @@
|
||||||
} else {
|
} else {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
/* Reference popups (provider-rendered: arXiv, Wikipedia,
|
||||||
|
…) get the larger glanceable layout; internal page
|
||||||
|
previews keep the compact one. Toggled per show since
|
||||||
|
the popup element is shared. */
|
||||||
|
popup.classList.toggle('link-popup--rich',
|
||||||
|
!!popup.querySelector('.popup-provider'));
|
||||||
positionPopup(target);
|
positionPopup(target);
|
||||||
popup.classList.add('is-visible');
|
popup.classList.add('is-visible');
|
||||||
popup.setAttribute('aria-hidden', 'false');
|
popup.setAttribute('aria-hidden', 'false');
|
||||||
|
|
@ -451,15 +457,31 @@
|
||||||
var bodyKey = fields.extract !== undefined ? 'extract' : 'abstract';
|
var bodyKey = fields.extract !== undefined ? 'extract' : 'abstract';
|
||||||
var body = truncate(fields[bodyKey], p.bodyLimit || 500);
|
var body = truncate(fields[bodyKey], p.bodyLimit || 500);
|
||||||
|
|
||||||
var html = '<div class="popup-' + p.name + '">'
|
/* The shared popup-provider class is what scheduleShow keys the
|
||||||
|
larger .link-popup--rich layout on — reference popups (arXiv,
|
||||||
|
Wikipedia, …) get the roomier glanceable treatment; internal
|
||||||
|
page previews and item cards keep the compact one. */
|
||||||
|
var html = '<div class="popup-' + p.name + ' popup-provider">'
|
||||||
+ srcHtml(iconKey, p.label);
|
+ srcHtml(iconKey, p.label);
|
||||||
/* Optional lead image (e.g. Wikipedia pageimages thumbnail).
|
/* Optional lead image (Wikipedia pageimages thumbnail, arXiv
|
||||||
https-only: the URL comes from the provider's API response,
|
lead figure) as a full-width banner under the source label.
|
||||||
and anything else (protocol-relative, data:, …) is dropped
|
Accepted srcs: https://… (API-supplied) or root-relative
|
||||||
rather than guessed at. esc() handles attribute safety. */
|
/proxy/… (same-origin proxied figures); anything else
|
||||||
if (fields.image && /^https:\/\//.test(fields.image)) {
|
(protocol-relative, data:, …) is dropped rather than guessed
|
||||||
html += '<img class="popup-image" src="' + esc(fields.image)
|
at. width/height attrs reserve the aspect ratio so the popup
|
||||||
+ '" alt="" loading="lazy">';
|
is positioned correctly before the image arrives. Photos
|
||||||
|
cover-crop; figures (plots, diagrams) must never be cropped,
|
||||||
|
so imageKind 'figure' letterboxes with object-fit: contain. */
|
||||||
|
var img = fields.image;
|
||||||
|
if (img && img.src &&
|
||||||
|
(/^https:\/\//.test(img.src) || /^\/(?!\/)/.test(img.src))) {
|
||||||
|
html += '<img class="popup-image-banner'
|
||||||
|
+ (fields.imageKind === 'figure' ? ' is-figure' : '')
|
||||||
|
+ '" src="' + esc(img.src) + '"'
|
||||||
|
+ (img.width && img.height
|
||||||
|
? ' width="' + (+img.width) + '" height="' + (+img.height) + '"'
|
||||||
|
: '')
|
||||||
|
+ ' alt="" loading="lazy">';
|
||||||
}
|
}
|
||||||
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
||||||
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
||||||
|
|
@ -490,8 +512,31 @@
|
||||||
|
|
||||||
return fetcher(url, p.fetchInit).then(function (data) {
|
return fetcher(url, p.fetchInit).then(function (data) {
|
||||||
if (!data) return null;
|
if (!data) return null;
|
||||||
var html = renderPopup(p, p.parse(data, ctx));
|
var fields = p.parse(data, ctx);
|
||||||
return html ? store(href, html) : null;
|
if (!fields) return null;
|
||||||
|
var finish = function (f) {
|
||||||
|
var html = renderPopup(p, f);
|
||||||
|
return html ? store(href, html) : null;
|
||||||
|
};
|
||||||
|
if (!p.enrich) return finish(fields);
|
||||||
|
/* enrich() is best-effort decoration (e.g. the arXiv lead
|
||||||
|
figure needs a second, potentially large fetch). Time-box
|
||||||
|
it so the metadata popup is never held hostage; if the
|
||||||
|
enrichment lands late, refresh the cache so the NEXT
|
||||||
|
hover gets the decorated version. enrich must return a
|
||||||
|
new fields object, leaving the original untouched for
|
||||||
|
the timeout path. */
|
||||||
|
var enriched = Promise.resolve()
|
||||||
|
.then(function () { return p.enrich(fields, ctx); })
|
||||||
|
.catch(function () { return null; });
|
||||||
|
var timeout = new Promise(function (resolve) {
|
||||||
|
setTimeout(resolve, 1800, undefined);
|
||||||
|
});
|
||||||
|
return Promise.race([enriched, timeout]).then(function (f) {
|
||||||
|
if (f !== undefined) return finish(f || fields);
|
||||||
|
enriched.then(function (late) { if (late) finish(late); });
|
||||||
|
return finish(fields);
|
||||||
|
});
|
||||||
}).catch(function () { return null; });
|
}).catch(function () { return null; });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -526,10 +571,11 @@
|
||||||
/* pageimages|extracts in one call: the article's lead
|
/* pageimages|extracts in one call: the article's lead
|
||||||
image thumbnail rides along with the intro text.
|
image thumbnail rides along with the intro text.
|
||||||
Thumbnails come from upload.wikimedia.org — that host
|
Thumbnails come from upload.wikimedia.org — that host
|
||||||
must stay in the CSP's img-src. */
|
must stay in the CSP's img-src. 480px because the
|
||||||
|
banner spans the rich popup's full width. */
|
||||||
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
||||||
+ '?action=query&prop=extracts%7Cpageimages&exintro=1'
|
+ '?action=query&prop=extracts%7Cpageimages&exintro=1'
|
||||||
+ '&piprop=thumbnail&pithumbsize=320'
|
+ '&piprop=thumbnail&pithumbsize=480'
|
||||||
+ '&format=json&redirects=1'
|
+ '&format=json&redirects=1'
|
||||||
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
||||||
+ '&origin=*';
|
+ '&origin=*';
|
||||||
|
|
@ -547,10 +593,16 @@
|
||||||
});
|
});
|
||||||
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
||||||
if (!text) return null;
|
if (!text) return null;
|
||||||
|
var thumb = page.thumbnail;
|
||||||
return {
|
return {
|
||||||
title: page.title,
|
title: page.title,
|
||||||
extract: text,
|
extract: text,
|
||||||
image: page.thumbnail && page.thumbnail.source
|
image: thumb && thumb.source
|
||||||
|
? { src: thumb.source,
|
||||||
|
width: thumb.width,
|
||||||
|
height: thumb.height }
|
||||||
|
: null,
|
||||||
|
imageKind: 'photo'
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -578,6 +630,47 @@
|
||||||
.map(function (el) { return el.textContent.trim(); }),
|
.map(function (el) { return el.textContent.trim(); }),
|
||||||
abstract: summaryEl.textContent.trim().replace(/\s+/g, ' ')
|
abstract: summaryEl.textContent.trim().replace(/\s+/g, ' ')
|
||||||
};
|
};
|
||||||
|
},
|
||||||
|
/* Lead figure, best-effort: arXiv's LaTeXML HTML rendition
|
||||||
|
(when one exists — roughly 2024+ papers with convertible
|
||||||
|
sources) carries the paper's figures. The first
|
||||||
|
figure.ltx_figure img is almost always the teaser /
|
||||||
|
architecture figure. Page and image both ride through
|
||||||
|
/proxy/arxiv-html/ (arxiv.org upstream — export.arxiv.org
|
||||||
|
rate-limits the /html/ asset tree), so img-src 'self'
|
||||||
|
covers them and nginx caches the heavy page fetch. */
|
||||||
|
enrich: function (fields, ctx) {
|
||||||
|
var id = ctx.match[1].replace(/v\d+$/, '');
|
||||||
|
return fetch('/proxy/arxiv-html/' + encodeURIComponent(id))
|
||||||
|
.then(function (r) {
|
||||||
|
if (!r.ok) return fields;
|
||||||
|
return r.text().then(function (text) {
|
||||||
|
var doc = new DOMParser().parseFromString(text, 'text/html');
|
||||||
|
var img = doc.querySelector('figure.ltx_figure img.ltx_graphics');
|
||||||
|
var src = img && img.getAttribute('src');
|
||||||
|
if (!src || /^(?:data|javascript):/i.test(src)) return fields;
|
||||||
|
/* Resolve against the page URL (r.url keeps any
|
||||||
|
redirect) — relative srcs like
|
||||||
|
"2410.21276v1/assets/x.png" become siblings
|
||||||
|
under /proxy/arxiv-html/. An upstream-absolute
|
||||||
|
"/html/…" src maps back into proxy space; any
|
||||||
|
other host is dropped. */
|
||||||
|
var resolved = new URL(src, r.url);
|
||||||
|
if (resolved.origin !== location.origin) return fields;
|
||||||
|
var path = resolved.pathname;
|
||||||
|
if (path.indexOf('/html/') === 0) {
|
||||||
|
path = '/proxy/arxiv-html/' + path.slice('/html/'.length);
|
||||||
|
}
|
||||||
|
if (path.indexOf('/proxy/arxiv-html/') !== 0) return fields;
|
||||||
|
return Object.assign({}, fields, {
|
||||||
|
image: { src: path,
|
||||||
|
width: img.getAttribute('width'),
|
||||||
|
height: img.getAttribute('height') },
|
||||||
|
imageKind: 'figure'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.catch(function () { return fields; });
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue