Fix popup previews: proxy prefix-strip bug, arXiv IDs, Wikipedia images
The root cause of 'PDF/arXiv previews simply do not work' was twofold:
1. nginx/popup-proxy.conf was never installed on the VPS — every
/proxy/* request (arXiv, PubMed, Internet Archive) returned nginx's
default 404. Now installed (snippets + http{}-context cache/limit
zones in conf.d, included in the vhost, nginx -t verified, reloaded).
2. The snippet itself had a latent bug that only surfaced once
installed: with a VARIABLE upstream, a URI part on proxy_pass is
passed literally — every request hit the upstream's homepage
(archive.org HTML where JSON was expected, arXiv 429s, NCBI doc-page
redirects). Fixed with explicit prefix-strip rewrites; bad cached
responses purged. All three proxies verified returning real data,
including a live arXiv title resolve.
Client-side improvements:
- arXiv match covers old-style IDs (cs/9901002, math.GT/0309136,
cond-mat/...v1) alongside new-style, and .pdf-suffixed /pdf/ URLs
(regex verified against six forms)
- Wikipedia popups show the article's lead image: pageimages rides
along the existing extracts call (pithumbsize=320), rendered via a
new https-only image slot in renderPopup with float styling;
upload.wikimedia.org added to the CSP's img-src
- pdf-thumbs now walks all of static/ (pdfjs pruned), so /cv.pdf and
/resume.pdf — the most-linked internal PDFs, previously thumbnail-less
and therefore popup-less — get hover previews
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
5d344f940e
commit
23250d8782
5
Makefile
5
Makefile
|
|
@ -122,8 +122,11 @@ pdf-thumbs:
|
||||||
# A failing pdftoppm must at least warn: the `find | while` pipeline's
|
# A failing pdftoppm must at least warn: the `find | while` pipeline's
|
||||||
# exit status is the last iteration's, so without the `||` a corrupt
|
# exit status is the last iteration's, so without the `||` a corrupt
|
||||||
# PDF would silently ship without a thumbnail.
|
# PDF would silently ship without a thumbnail.
|
||||||
|
# Walk ALL of static/ (not just papers/): /cv.pdf and /resume.pdf are
|
||||||
|
# the most-linked PDFs on the site and need hover thumbnails too.
|
||||||
|
# pdfjs/ is pruned — the vendored viewer ships sample PDFs.
|
||||||
@if command -v pdftoppm >/dev/null 2>&1; then \
|
@if command -v pdftoppm >/dev/null 2>&1; then \
|
||||||
find static/papers -name '*.pdf' 2>/dev/null | while read pdf; do \
|
find static -path static/pdfjs -prune -o -name '*.pdf' -print 2>/dev/null | while read pdf; do \
|
||||||
thumb="$${pdf%.pdf}.thumb"; \
|
thumb="$${pdf%.pdf}.thumb"; \
|
||||||
if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \
|
if [ ! -f "$${thumb}.png" ] || [ "$$pdf" -nt "$${thumb}.png" ]; then \
|
||||||
echo " pdf-thumb $$pdf"; \
|
echo " pdf-thumb $$pdf"; \
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,12 @@ resolver_timeout 5s;
|
||||||
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
# (revisions get distinct IDs like 2604.06217v2), so 30d is safe.
|
||||||
location /proxy/arxiv/ {
|
location /proxy/arxiv/ {
|
||||||
set $upstream_arxiv export.arxiv.org;
|
set $upstream_arxiv export.arxiv.org;
|
||||||
proxy_pass https://$upstream_arxiv/;
|
# With a VARIABLE upstream, a URI part on proxy_pass is passed to
|
||||||
|
# the upstream literally — "proxy_pass https://$up/;" sends every
|
||||||
|
# request to the upstream's homepage instead of prefix-stripping.
|
||||||
|
# Strip the prefix explicitly; `break` keeps args intact.
|
||||||
|
rewrite ^/proxy/arxiv/(.*)$ /$1 break;
|
||||||
|
proxy_pass https://$upstream_arxiv;
|
||||||
proxy_set_header Host $upstream_arxiv;
|
proxy_set_header Host $upstream_arxiv;
|
||||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||||
proxy_ssl_server_name on;
|
proxy_ssl_server_name on;
|
||||||
|
|
@ -55,7 +60,10 @@ location /proxy/arxiv/ {
|
||||||
# change, but rarely; 7d strikes a reasonable balance.
|
# change, but rarely; 7d strikes a reasonable balance.
|
||||||
location /proxy/archive/ {
|
location /proxy/archive/ {
|
||||||
set $upstream_archive archive.org;
|
set $upstream_archive archive.org;
|
||||||
proxy_pass https://$upstream_archive/;
|
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||||
|
# on a variable proxy_pass would break this.
|
||||||
|
rewrite ^/proxy/archive/(.*)$ /$1 break;
|
||||||
|
proxy_pass https://$upstream_archive;
|
||||||
proxy_set_header Host $upstream_archive;
|
proxy_set_header Host $upstream_archive;
|
||||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||||
proxy_ssl_server_name on;
|
proxy_ssl_server_name on;
|
||||||
|
|
@ -76,7 +84,10 @@ location /proxy/archive/ {
|
||||||
# them server-side so popups.js stays focused on rendering.
|
# them server-side so popups.js stays focused on rendering.
|
||||||
location /proxy/pubmed/ {
|
location /proxy/pubmed/ {
|
||||||
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
set $upstream_pubmed eutils.ncbi.nlm.nih.gov;
|
||||||
proxy_pass https://$upstream_pubmed/;
|
# Prefix-strip explicitly — see the arXiv block for why a URI part
|
||||||
|
# on a variable proxy_pass would break this.
|
||||||
|
rewrite ^/proxy/pubmed/(.*)$ /$1 break;
|
||||||
|
proxy_pass https://$upstream_pubmed;
|
||||||
proxy_set_header Host $upstream_pubmed;
|
proxy_set_header Host $upstream_pubmed;
|
||||||
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
proxy_set_header User-Agent "levineuwirth.org popup-proxy (ln@levineuwirth.org)";
|
||||||
proxy_ssl_server_name on;
|
proxy_ssl_server_name on;
|
||||||
|
|
|
||||||
|
|
@ -75,4 +75,4 @@ add_header Permissions-Policy
|
||||||
#
|
#
|
||||||
# To collect violation reports, set up a `report-uri` endpoint and add
|
# To collect violation reports, set up a `report-uri` endpoint and add
|
||||||
# `report-uri /csp-report;` (and/or `report-to <group>;`) below.
|
# `report-uri /csp-report;` (and/or `report-to <group>;`) below.
|
||||||
add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com; font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
|
add_header Content-Security-Policy-Report-Only "default-src 'self'; script-src 'self' 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data: https://*.basemaps.cartocdn.com https://upload.wikimedia.org;font-src 'self' data: https://cdn.jsdelivr.net; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://api.crossref.org https://api.github.com https://openlibrary.org https://api.biorxiv.org https://www.youtube.com https://git.levineuwirth.org; frame-ancestors 'none'; base-uri 'self'; form-action 'self'; object-src 'none'; upgrade-insecure-requests" always;
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,18 @@
|
||||||
line-height: 1.35;
|
line-height: 1.35;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Optional lead image (Wikipedia pageimages thumbnail, etc.) — floats
|
||||||
|
beside the title/extract so text wraps around it; contained by the
|
||||||
|
popup's own overflow box. */
|
||||||
|
.popup-image {
|
||||||
|
float: right;
|
||||||
|
max-width: 96px;
|
||||||
|
max-height: 120px;
|
||||||
|
margin: 0 0 0.4rem 0.6rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
border: 1px solid var(--border-muted);
|
||||||
|
}
|
||||||
|
|
||||||
.popup-abstract,
|
.popup-abstract,
|
||||||
.popup-extract {
|
.popup-extract {
|
||||||
font-size: 0.78rem;
|
font-size: 0.78rem;
|
||||||
|
|
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 192 KiB |
|
|
@ -453,6 +453,14 @@
|
||||||
|
|
||||||
var html = '<div class="popup-' + p.name + '">'
|
var html = '<div class="popup-' + p.name + '">'
|
||||||
+ srcHtml(iconKey, p.label);
|
+ srcHtml(iconKey, p.label);
|
||||||
|
/* Optional lead image (e.g. Wikipedia pageimages thumbnail).
|
||||||
|
https-only: the URL comes from the provider's API response,
|
||||||
|
and anything else (protocol-relative, data:, …) is dropped
|
||||||
|
rather than guessed at. esc() handles attribute safety. */
|
||||||
|
if (fields.image && /^https:\/\//.test(fields.image)) {
|
||||||
|
html += '<img class="popup-image" src="' + esc(fields.image)
|
||||||
|
+ '" alt="" loading="lazy">';
|
||||||
|
}
|
||||||
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
if (fields.tags) html += '<div class="popup-tags">' + esc(fields.tags) + '</div>';
|
||||||
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
html += '<div class="popup-title">' + esc(fields.title) + '</div>';
|
||||||
if (authors) html += '<div class="popup-authors">' + esc(authors) + '</div>';
|
if (authors) html += '<div class="popup-authors">' + esc(authors) + '</div>';
|
||||||
|
|
@ -515,8 +523,14 @@
|
||||||
var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
|
var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
|
||||||
var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en';
|
var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en';
|
||||||
if (sub === 'www') sub = 'en';
|
if (sub === 'www') sub = 'en';
|
||||||
|
/* pageimages|extracts in one call: the article's lead
|
||||||
|
image thumbnail rides along with the intro text.
|
||||||
|
Thumbnails come from upload.wikimedia.org — that host
|
||||||
|
must stay in the CSP's img-src. */
|
||||||
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
||||||
+ '?action=query&prop=extracts&exintro=1&format=json&redirects=1'
|
+ '?action=query&prop=extracts%7Cpageimages&exintro=1'
|
||||||
|
+ '&piprop=thumbnail&pithumbsize=320'
|
||||||
|
+ '&format=json&redirects=1'
|
||||||
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
||||||
+ '&origin=*';
|
+ '&origin=*';
|
||||||
},
|
},
|
||||||
|
|
@ -533,14 +547,21 @@
|
||||||
});
|
});
|
||||||
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
var text = (doc.body.textContent || '').replace(/\s+/g, ' ').trim();
|
||||||
if (!text) return null;
|
if (!text) return null;
|
||||||
return { title: page.title, extract: text };
|
return {
|
||||||
|
title: page.title,
|
||||||
|
extract: text,
|
||||||
|
image: page.thumbnail && page.thumbnail.source
|
||||||
|
};
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
/* arXiv — Atom API (CORS-broken upstream, proxied). */
|
/* arXiv — Atom API (CORS-broken upstream, proxied).
|
||||||
|
ID forms: new-style 2403.12345(v2), and old-style
|
||||||
|
archive/0211159 or archive.SC/0211159 (pre-2007); /pdf/ URLs
|
||||||
|
may carry a trailing .pdf, which stays outside the capture. */
|
||||||
{
|
{
|
||||||
name: 'arxiv', label: 'arXiv',
|
name: 'arxiv', label: 'arXiv',
|
||||||
match: /arxiv\.org\/(?:abs|pdf)\/(\d{4}\.\d{4,5}(?:v\d+)?)/,
|
match: /arxiv\.org\/(?:abs|pdf)\/((?:\d{4}\.\d{4,5}|[a-z-]+(?:\.[A-Z]{2})?\/\d{7})(?:v\d+)?)/,
|
||||||
fetchType: 'xml',
|
fetchType: 'xml',
|
||||||
url: function (ctx) {
|
url: function (ctx) {
|
||||||
return '/proxy/arxiv/api/query?id_list='
|
return '/proxy/arxiv/api/query?id_list='
|
||||||
|
|
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 270 KiB |
Loading…
Reference in New Issue