States/Context/Embeddings fixes
This commit is contained in:
parent
6d2f9d12ae
commit
6585573dae
22
.env.example
22
.env.example
|
|
@ -1,9 +1,15 @@
|
||||||
# Copy this file to .env and fill in the values.
|
# Copy this file to .env and fill in the values, then run:
|
||||||
# .env is gitignored — never commit it.
|
# chmod 600 .env
|
||||||
|
# so other local users cannot read your VPS path / token. .env is
|
||||||
|
# gitignored — never commit it. The auto-snapshot in `make build`
|
||||||
|
# uses an explicit pathspec under content/ to keep stray .env files
|
||||||
|
# out of the snapshot, but **/.env is also in .gitignore as a backstop.
|
||||||
#
|
#
|
||||||
# `make deploy` rsyncs the built _site/ to the VPS, then pushes the
|
# `make deploy` pushes to GitHub first, then rsyncs the built _site/
|
||||||
# repository to GitHub. The Makefile aborts with a clear error if any
|
# to the VPS. The Makefile aborts with a clear error if any of
|
||||||
# of VPS_USER / VPS_HOST / VPS_PATH is unset.
|
# VPS_USER / VPS_HOST / VPS_PATH is unset, if VPS_PATH points at an
|
||||||
|
# obviously dangerous parent directory, or if _site/index.html does
|
||||||
|
# not exist (a sign of a broken build).
|
||||||
|
|
||||||
# --- VPS deployment target -------------------------------------------------
|
# --- VPS deployment target -------------------------------------------------
|
||||||
# SSH user on the deployment VPS.
|
# SSH user on the deployment VPS.
|
||||||
|
|
@ -15,8 +21,10 @@ VPS_PATH=
|
||||||
|
|
||||||
# --- GitHub mirror push ----------------------------------------------------
|
# --- GitHub mirror push ----------------------------------------------------
|
||||||
# A GitHub fine-grained personal access token with Contents: read+write
|
# A GitHub fine-grained personal access token with Contents: read+write
|
||||||
# on the levineuwirth.org repository.
|
# on the levineuwirth.org repository. Currently optional — `make deploy`
|
||||||
# Generate at: https://github.com/settings/tokens
|
# uses your local git credential helper for `git push`, so this is only
|
||||||
|
# needed if you wire token-based push into a credential helper yourself.
|
||||||
|
# Generate at: https://github.com/settings/personal-access-tokens/new
|
||||||
GITHUB_TOKEN=
|
GITHUB_TOKEN=
|
||||||
|
|
||||||
# The GitHub repository in owner/repo format.
|
# The GitHub repository in owner/repo format.
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,10 @@ _site/
|
||||||
_cache/
|
_cache/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.env
|
.env
|
||||||
|
# Defense-in-depth: catch any stray .env / .env.* anywhere in the tree
|
||||||
|
# (the auto-snapshot in the Makefile stages content/ on every build).
|
||||||
|
**/.env
|
||||||
|
**/.env.*
|
||||||
|
|
||||||
# Editor backup/swap files
|
# Editor backup/swap files
|
||||||
*~
|
*~
|
||||||
|
|
|
||||||
42
Makefile
42
Makefile
|
|
@ -1,9 +1,12 @@
|
||||||
.PHONY: build deploy sign download-model download-pdfjs compress-assets convert-images pdf-thumbs pdfs watch clean dev
|
.PHONY: build deploy sign download-model download-pdfjs compress-assets convert-images pdf-thumbs pdfs watch clean dev
|
||||||
|
|
||||||
# Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists.
|
# Source .env for deploy / GitHub config if it exists.
|
||||||
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
||||||
|
# Only the variables explicitly listed below are exported to recipe
|
||||||
|
# subprocesses — bare `export` would leak every .env key (including any
|
||||||
|
# future GITHUB_TOKEN) into every child process.
|
||||||
-include .env
|
-include .env
|
||||||
export
|
export VPS_USER VPS_HOST VPS_PATH GITHUB_REPO
|
||||||
|
|
||||||
build:
|
build:
|
||||||
# Auto-snapshot any uncommitted content/ changes BEFORE the build
|
# Auto-snapshot any uncommitted content/ changes BEFORE the build
|
||||||
|
|
@ -12,8 +15,20 @@ build:
|
||||||
# the history — that's intentional. The next successful build
|
# the history — that's intentional. The next successful build
|
||||||
# either reuses it (no new content/ changes) or appends another
|
# either reuses it (no new content/ changes) or appends another
|
||||||
# snapshot on top, so failures don't disappear from the log.
|
# snapshot on top, so failures don't disappear from the log.
|
||||||
@git add content/
|
#
|
||||||
@git diff --cached --quiet || git commit -m "auto: $$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
# Pathspec is explicit (not `git add content/`) so a stray .env,
|
||||||
|
# credential file, or other non-content artifact dropped under
|
||||||
|
# content/ is NOT auto-staged. The :(glob) magic prefix makes `**`
|
||||||
|
# match across path components (git default fnmatch does not).
|
||||||
|
# Add new extensions here if a new asset type is introduced.
|
||||||
|
@git add ':(glob)content/**/*.md' ':(glob)content/**/*.html' ':(glob)content/**/*.bib' \
|
||||||
|
':(glob)content/**/*.png' ':(glob)content/**/*.jpg' ':(glob)content/**/*.jpeg' \
|
||||||
|
':(glob)content/**/*.svg' ':(glob)content/**/*.gif' ':(glob)content/**/*.pdf' \
|
||||||
|
':(glob)content/**/*.mp3' ':(glob)content/**/*.ogg' ':(glob)content/**/*.flac' \
|
||||||
|
':(glob)content/**/*.yaml' ':(glob)content/**/*.yml' ':(glob)content/**/*.json' \
|
||||||
|
':(glob)content/**/*.css' ':(glob)content/**/*.tex'
|
||||||
|
@git diff --cached --quiet || git commit -m "auto: $$(date -u +%Y-%m-%dT%H:%M:%SZ) [skip ci]"
|
||||||
|
@mkdir -p data
|
||||||
@date +%s > data/build-start.txt
|
@date +%s > data/build-start.txt
|
||||||
@./tools/convert-images.sh
|
@./tools/convert-images.sh
|
||||||
@$(MAKE) -s pdf-thumbs
|
@$(MAKE) -s pdf-thumbs
|
||||||
|
|
@ -29,7 +44,8 @@ build:
|
||||||
> IGNORE.txt
|
> IGNORE.txt
|
||||||
@BUILD_END=$$(date +%s); \
|
@BUILD_END=$$(date +%s); \
|
||||||
BUILD_START=$$(cat data/build-start.txt); \
|
BUILD_START=$$(cat data/build-start.txt); \
|
||||||
echo $$((BUILD_END - BUILD_START)) > data/last-build-seconds.txt
|
echo $$((BUILD_END - BUILD_START)) > data/last-build-seconds.txt.tmp && \
|
||||||
|
mv data/last-build-seconds.txt.tmp data/last-build-seconds.txt
|
||||||
|
|
||||||
sign:
|
sign:
|
||||||
@./tools/sign-site.sh
|
@./tools/sign-site.sh
|
||||||
|
|
@ -99,9 +115,19 @@ deploy: clean build sign
|
||||||
@test -n "$(VPS_USER)" || (echo "deploy: VPS_USER not set in .env" >&2; exit 1)
|
@test -n "$(VPS_USER)" || (echo "deploy: VPS_USER not set in .env" >&2; exit 1)
|
||||||
@test -n "$(VPS_HOST)" || (echo "deploy: VPS_HOST not set in .env" >&2; exit 1)
|
@test -n "$(VPS_HOST)" || (echo "deploy: VPS_HOST not set in .env" >&2; exit 1)
|
||||||
@test -n "$(VPS_PATH)" || (echo "deploy: VPS_PATH not set in .env" >&2; exit 1)
|
@test -n "$(VPS_PATH)" || (echo "deploy: VPS_PATH not set in .env" >&2; exit 1)
|
||||||
@command -v notify-send >/dev/null 2>&1 && notify-send "make deploy" "Ready to rsync — waiting for SSH auth" || true
|
# Refuse to deploy a manifestly broken build. _site/index.html must
|
||||||
rsync -avz --delete _site/ $(VPS_USER)@$(VPS_HOST):$(VPS_PATH)/
|
# exist and be non-empty before we run rsync --delete on the VPS.
|
||||||
|
@test -s _site/index.html || { echo "deploy: _site/index.html is missing or empty — refusing to rsync" >&2; exit 1; }
|
||||||
|
# Defense-in-depth: refuse rsync --delete to obviously dangerous
|
||||||
|
# parents in case VPS_PATH was typo'd (e.g. trailing-slash mistake).
|
||||||
|
@case "$(VPS_PATH)" in /|/srv|/srv/http|/var|/var/www|/home|/root|"") echo "deploy: VPS_PATH=$(VPS_PATH) looks unsafe — refusing" >&2; exit 1 ;; esac
|
||||||
|
@command -v notify-send >/dev/null 2>&1 && notify-send "make deploy" "Ready to push & rsync — waiting for auth" || true
|
||||||
|
# Push first: a successful push is cheap to roll back, while a
|
||||||
|
# half-completed rsync is harder to recover from. If the push
|
||||||
|
# fails (auth, branch protection, network), abort before touching
|
||||||
|
# the VPS so the public source repo and the live site stay in sync.
|
||||||
git push -u origin main
|
git push -u origin main
|
||||||
|
rsync -avz --delete _site/ $(VPS_USER)@$(VPS_HOST):$(VPS_PATH)/
|
||||||
|
|
||||||
watch: export SITE_ENV = dev
|
watch: export SITE_ENV = dev
|
||||||
watch:
|
watch:
|
||||||
|
|
@ -117,4 +143,4 @@ dev: export SITE_ENV = dev
|
||||||
dev:
|
dev:
|
||||||
cabal run site -- clean
|
cabal run site -- clean
|
||||||
cabal run site -- build
|
cabal run site -- build
|
||||||
python3 -m http.server 8000 --directory _site
|
python3 -m http.server 8000 --bind 127.0.0.1 --directory _site
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ module Contexts
|
||||||
, compositionCtx
|
, compositionCtx
|
||||||
, contentKindField
|
, contentKindField
|
||||||
, abstractField
|
, abstractField
|
||||||
|
, descriptionField
|
||||||
, tagLinksField
|
, tagLinksField
|
||||||
, tagLinksFieldExcludingScope
|
, tagLinksFieldExcludingScope
|
||||||
, tagLinksFieldExcludingTopSegment
|
, tagLinksFieldExcludingTopSegment
|
||||||
|
|
@ -34,7 +35,7 @@ import Data.Time.Format (formatTime, defaultTimeLocale, parseTimeM)
|
||||||
import System.FilePath (takeDirectory, takeFileName)
|
import System.FilePath (takeDirectory, takeFileName)
|
||||||
import Text.Read (readMaybe)
|
import Text.Read (readMaybe)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Text.Pandoc (runPure, readMarkdown, writeHtml5String, Pandoc(..), Block(..), Inline(..))
|
import Text.Pandoc (runPure, readMarkdown, writeHtml5String, writePlain, Pandoc(..), Block(..), Inline(..))
|
||||||
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
|
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
|
||||||
import Hakyll hiding (trim)
|
import Hakyll hiding (trim)
|
||||||
import Backlinks (backlinksField)
|
import Backlinks (backlinksField)
|
||||||
|
|
@ -348,6 +349,44 @@ abstractField = field "abstract" $ \item -> do
|
||||||
isPara (Para _) = True
|
isPara (Para _) = True
|
||||||
isPara _ = False
|
isPara _ = False
|
||||||
|
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
-- Description field
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | Renders the @abstract@ frontmatter key as plain text suitable for use in
|
||||||
|
-- @<meta name="description">@, @og:description@, and @twitter:description@.
|
||||||
|
-- Strips Pandoc markup, collapses internal whitespace, truncates to ~200
|
||||||
|
-- chars, and HTML-escapes attribute-special characters. Returns @noResult@
|
||||||
|
-- when no @abstract@ is present (so @$if(description)$@ short-circuits).
|
||||||
|
descriptionField :: Context String
|
||||||
|
descriptionField = field "description" $ \item -> do
|
||||||
|
meta <- getMetadata (itemIdentifier item)
|
||||||
|
case lookupString "abstract" meta of
|
||||||
|
Nothing -> fail "no abstract"
|
||||||
|
Just src -> do
|
||||||
|
let pandocResult = runPure $ do
|
||||||
|
doc <- readMarkdown defaultHakyllReaderOptions (T.pack src)
|
||||||
|
writePlain defaultHakyllWriterOptions doc
|
||||||
|
case pandocResult of
|
||||||
|
Left err -> fail $ "Pandoc error rendering description: " ++ show err
|
||||||
|
Right txt ->
|
||||||
|
let collapsed = T.unwords (T.words txt)
|
||||||
|
capped = if T.length collapsed > 200
|
||||||
|
then T.take 197 collapsed <> T.pack "\x2026"
|
||||||
|
else collapsed
|
||||||
|
in return (attrEscape (T.unpack capped))
|
||||||
|
|
||||||
|
-- | HTML-escape characters that would break out of an attribute value.
|
||||||
|
attrEscape :: String -> String
|
||||||
|
attrEscape = concatMap esc
|
||||||
|
where
|
||||||
|
esc '&' = "&"
|
||||||
|
esc '<' = "<"
|
||||||
|
esc '>' = ">"
|
||||||
|
esc '"' = """
|
||||||
|
esc '\'' = "'"
|
||||||
|
esc c = [c]
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
-- Summary field
|
-- Summary field
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
@ -377,6 +416,7 @@ siteCtx =
|
||||||
<> buildTimeField
|
<> buildTimeField
|
||||||
<> pageScriptsField
|
<> pageScriptsField
|
||||||
<> abstractField
|
<> abstractField
|
||||||
|
<> descriptionField
|
||||||
<> summaryField
|
<> summaryField
|
||||||
<> dingbatField
|
<> dingbatField
|
||||||
<> defaultContext
|
<> defaultContext
|
||||||
|
|
|
||||||
|
|
@ -619,7 +619,10 @@ renderDistribution wcs =
|
||||||
]
|
]
|
||||||
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc)
|
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc)
|
||||||
(Map.fromList [(i, 0 :: Int) | i <- [0 .. 4]]) wcs
|
(Map.fromList [(i, 0 :: Int) | i <- [0 .. 4]]) wcs
|
||||||
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0 .. 4]]
|
-- Pair labels with bucket indices via @zip@ rather than @(!!)@ to keep
|
||||||
|
-- the function total even if the bucket count and @labels@ list ever
|
||||||
|
-- drift out of sync (matching the discipline used in 'median').
|
||||||
|
buckets = [(lbl, fromMaybe 0 (Map.lookup i counts)) | (i, lbl) <- zip [0 :: Int ..] labels]
|
||||||
maxCount = max 1 (maximum (map snd buckets))
|
maxCount = max 1 (maximum (map snd buckets))
|
||||||
bar (lbl, n) =
|
bar (lbl, n) =
|
||||||
let pct = n * 100 `div` maxCount
|
let pct = n * 100 `div` maxCount
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,6 @@ evidence: 2
|
||||||
scope: broad
|
scope: broad
|
||||||
novelty: idiosyncratic
|
novelty: idiosyncratic
|
||||||
practicality: high
|
practicality: high
|
||||||
confidence history:
|
confidence-history:
|
||||||
- 65
|
- 65
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ tags:
|
||||||
- nonfiction/philosophy
|
- nonfiction/philosophy
|
||||||
authors:
|
authors:
|
||||||
- "Levi Neuwirth | /me.html"
|
- "Levi Neuwirth | /me.html"
|
||||||
revised:
|
history:
|
||||||
- date: "2026-04-17"
|
- date: "2026-04-17"
|
||||||
note: "expanded section on Shestov's divergence from Nietzsche"
|
note: "expanded section on Shestov's divergence from Nietzsche"
|
||||||
- date: "2025-12-03"
|
- date: "2025-12-03"
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
---
|
||||||
|
title: Library
|
||||||
|
library: true
|
||||||
|
---
|
||||||
|
|
||||||
::: {lang="es"}
|
::: {lang="es"}
|
||||||
> *El universo (que otros llaman la Biblioteca) se compone de un número indefinido, y tal vez infinito, de galerías hexagonales, con vastos pozos de ventilación en el medio, cercados por barandas bajísimas.*
|
> *El universo (que otros llaman la Biblioteca) se compone de un número indefinido, y tal vez infinito, de galerías hexagonales, con vastos pozos de ventilación en el medio, cercados por barandas bajísimas.*
|
||||||
>
|
>
|
||||||
|
|
|
||||||
|
|
@ -294,20 +294,32 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 1. Citations — synchronous DOM lookup; supports multi-citation groups
|
/* 1. Citations — synchronous DOM lookup; supports multi-citation groups
|
||||||
via data-cite-keys (space-separated list of ref-* IDs). */
|
via data-cite-keys (space-separated list of ref-* IDs).
|
||||||
|
Returns a DocumentFragment of cloned bibliography entries instead
|
||||||
|
of stringifying innerHTML, so a malicious or malformed cite target
|
||||||
|
cannot smuggle markup through the popup's innerHTML setter. */
|
||||||
function citationContent(target) {
|
function citationContent(target) {
|
||||||
return new Promise(function (resolve) {
|
return new Promise(function (resolve) {
|
||||||
var keysAttr = target.getAttribute('data-cite-keys');
|
var keysAttr = target.getAttribute('data-cite-keys');
|
||||||
var ids = keysAttr
|
var ids = keysAttr
|
||||||
? keysAttr.trim().split(/\s+/)
|
? keysAttr.trim().split(/\s+/)
|
||||||
: [(target.getAttribute('href') || '').slice(1)];
|
: [(target.getAttribute('href') || '').slice(1)];
|
||||||
var parts = ids.map(function (id) {
|
var entries = ids
|
||||||
var entry = document.getElementById(id);
|
.map(function (id) { return document.getElementById(id); })
|
||||||
return entry ? '<div class="popup-citation-entry">' + entry.innerHTML + '</div>' : null;
|
.filter(Boolean);
|
||||||
}).filter(Boolean);
|
if (!entries.length) { resolve(null); return; }
|
||||||
resolve(parts.length
|
|
||||||
? '<div class="popup-citation">' + parts.join('') + '</div>'
|
var wrapper = document.createElement('div');
|
||||||
: null);
|
wrapper.className = 'popup-citation';
|
||||||
|
entries.forEach(function (entry) {
|
||||||
|
var item = document.createElement('div');
|
||||||
|
item.className = 'popup-citation-entry';
|
||||||
|
Array.prototype.forEach.call(entry.childNodes, function (n) {
|
||||||
|
item.appendChild(n.cloneNode(true));
|
||||||
|
});
|
||||||
|
wrapper.appendChild(item);
|
||||||
|
});
|
||||||
|
resolve(wrapper);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
<a href="/">levineuwirth.org</a>
|
<a href="/">levineuwirth.org</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="footer-center">
|
<div class="footer-center">
|
||||||
<span class="footer-license">CC BY-SA-NC 4.0 · <a href="https://git.levineuwirth.org/neuwirth/levineuwirth.org">MIT</a> · <a href="/memento-mori.html" class="footer-mm">MM</a></span>
|
<span class="footer-license"><a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" rel="license">CC BY-NC-SA 4.0</a> · <a href="https://git.levineuwirth.org/neuwirth/levineuwirth.org">MIT</a> · <a href="/memento-mori.html" class="footer-mm">MM</a></span>
|
||||||
</div>
|
</div>
|
||||||
<div class="footer-right">
|
<div class="footer-right">
|
||||||
<a href="/build/" class="footer-build-link" aria-label="Build telemetry">build</a> $build-time$
|
<a href="/build/" class="footer-build-link" aria-label="Build telemetry">build</a> $build-time$
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,21 @@
|
||||||
<meta charset="UTF-8">
|
<meta charset="utf-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
$if(home)$<title>Levi Neuwirth</title>$else$<title>$title$ — Levi Neuwirth</title>$endif$
|
$if(home)$<title>Levi Neuwirth</title>$else$$if(title)$<title>$title$ — Levi Neuwirth</title>$else$<title>Levi Neuwirth</title>$endif$$endif$
|
||||||
|
$if(description)$<meta name="description" content="$description$">$endif$
|
||||||
|
<link rel="canonical" href="$site-url$$url$">
|
||||||
|
<link rel="alternate" type="application/atom+xml" title="Levi Neuwirth" href="/feed.xml">
|
||||||
|
<link rel="alternate" type="application/atom+xml" title="Levi Neuwirth — music" href="/music/feed.xml">
|
||||||
|
|
||||||
|
<!-- OpenGraph / Twitter (link-preview unfurling) -->
|
||||||
|
<meta property="og:site_name" content="Levi Neuwirth">
|
||||||
|
$if(home)$<meta property="og:title" content="Levi Neuwirth">$else$$if(title)$<meta property="og:title" content="$title$">$endif$$endif$
|
||||||
|
$if(description)$<meta property="og:description" content="$description$">$endif$
|
||||||
|
<meta property="og:url" content="$site-url$$url$">
|
||||||
|
$if(date)$<meta property="og:type" content="article">$else$<meta property="og:type" content="website">$endif$
|
||||||
|
<meta property="og:image" content="$site-url$/web-app-manifest-512x512.png">
|
||||||
|
<meta name="twitter:card" content="summary">
|
||||||
|
$if(description)$<meta name="twitter:description" content="$description$">$endif$
|
||||||
|
|
||||||
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
|
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
|
||||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg">
|
<link rel="icon" type="image/svg+xml" href="/favicon.svg">
|
||||||
<link rel="shortcut icon" href="/favicon.ico">
|
<link rel="shortcut icon" href="/favicon.ico">
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ Staleness check: skips if all output files are newer than every HTML in _site/.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -45,6 +46,19 @@ MAX_PARA_CHARS = 1000 # semantic: truncate before embedding
|
||||||
|
|
||||||
EXCLUDE_URLS = {"/search/", "/build/", "/404.html", "/feed.xml", "/music/feed.xml"}
|
EXCLUDE_URLS = {"/search/", "/build/", "/404.html", "/feed.xml", "/music/feed.xml"}
|
||||||
|
|
||||||
|
|
||||||
|
def atomic_write_bytes(path: Path, data: bytes) -> None:
|
||||||
|
"""Write to path.tmp then os.replace, so an interrupt mid-write
|
||||||
|
cannot leave a truncated file that the next build/serve loads."""
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||||
|
tmp.write_bytes(data)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
|
||||||
|
|
||||||
|
def atomic_write_text(path: Path, text: str) -> None:
|
||||||
|
atomic_write_bytes(path, text.encode("utf-8"))
|
||||||
|
|
||||||
STRIP_SELECTORS = [
|
STRIP_SELECTORS = [
|
||||||
"nav", "footer", "#toc", ".link-popup", "script", "style",
|
"nav", "footer", "#toc", ".link-popup", "script", "style",
|
||||||
".page-meta-footer", ".metadata", "[data-pagefind-ignore]",
|
".page-meta-footer", ".metadata", "[data-pagefind-ignore]",
|
||||||
|
|
@ -204,8 +218,7 @@ def main() -> int:
|
||||||
if neighbours:
|
if neighbours:
|
||||||
similar[page["url"]] = neighbours
|
similar[page["url"]] = neighbours
|
||||||
|
|
||||||
SIMILAR_OUT.parent.mkdir(parents=True, exist_ok=True)
|
atomic_write_text(SIMILAR_OUT, json.dumps(similar, ensure_ascii=False, indent=2))
|
||||||
SIMILAR_OUT.write_text(json.dumps(similar, ensure_ascii=False, indent=2))
|
|
||||||
print(f"embed.py: wrote {len(similar)} similar-links entries")
|
print(f"embed.py: wrote {len(similar)} similar-links entries")
|
||||||
|
|
||||||
# --- Semantic index (paragraph level) ---
|
# --- Semantic index (paragraph level) ---
|
||||||
|
|
@ -221,12 +234,12 @@ def main() -> int:
|
||||||
batch_size=64,
|
batch_size=64,
|
||||||
).astype(np.float32)
|
).astype(np.float32)
|
||||||
|
|
||||||
SEMANTIC_BIN.write_bytes(para_vecs.tobytes())
|
atomic_write_bytes(SEMANTIC_BIN, para_vecs.tobytes())
|
||||||
|
|
||||||
meta = [{"url": p["url"], "title": p["title"],
|
meta = [{"url": p["url"], "title": p["title"],
|
||||||
"heading": p["heading"], "excerpt": p["excerpt"]}
|
"heading": p["heading"], "excerpt": p["excerpt"]}
|
||||||
for p in paragraphs]
|
for p in paragraphs]
|
||||||
SEMANTIC_META.write_text(json.dumps(meta, ensure_ascii=False))
|
atomic_write_text(SEMANTIC_META, json.dumps(meta, ensure_ascii=False))
|
||||||
|
|
||||||
print(f"embed.py: wrote {len(paragraphs)} paragraphs to semantic index "
|
print(f"embed.py: wrote {len(paragraphs)} paragraphs to semantic index "
|
||||||
f"({SEMANTIC_BIN.stat().st_size // 1024} KB)")
|
f"({SEMANTIC_BIN.stat().st_size // 1024} KB)")
|
||||||
|
|
|
||||||
|
|
@ -40,15 +40,51 @@ if ! GNUPGHOME="$GNUPGHOME" gpg \
|
||||||
fi
|
fi
|
||||||
echo "sign-site: pre-flight OK — signing $SITE_DIR..." >&2
|
echo "sign-site: pre-flight OK — signing $SITE_DIR..." >&2
|
||||||
|
|
||||||
find "$SITE_DIR" -name "*.html" -print0 | xargs -0 -I {} -P $(nproc) \
|
# Sign sequentially through a single gpg-agent: parallel signing causes
|
||||||
gpg --homedir "$GNUPGHOME" \
|
# pinentry/IPC races where individual signs fail silently while xargs
|
||||||
|
# still exits 0. Atomic write via .tmp + mv avoids leaving a truncated
|
||||||
|
# .sig if the script is interrupted mid-write.
|
||||||
|
sign_one() {
|
||||||
|
local html="$1"
|
||||||
|
local sig="${html}.sig"
|
||||||
|
local tmp="${sig}.tmp"
|
||||||
|
if ! gpg --homedir "$GNUPGHOME" \
|
||||||
--batch \
|
--batch \
|
||||||
--yes \
|
--yes \
|
||||||
--detach-sign \
|
--detach-sign \
|
||||||
--armor \
|
--armor \
|
||||||
--local-user "$SIGNING_KEY" \
|
--local-user "$SIGNING_KEY" \
|
||||||
--output "{}.sig" \
|
--output "$tmp" \
|
||||||
"{}"
|
"$html"; then
|
||||||
|
rm -f "$tmp"
|
||||||
|
echo "sign-site: FAILED to sign $html" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
mv -f "$tmp" "$sig"
|
||||||
|
}
|
||||||
|
|
||||||
|
count=0
|
||||||
|
while IFS= read -r -d '' html; do
|
||||||
|
sign_one "$html"
|
||||||
|
count=$((count + 1))
|
||||||
|
done < <(find "$SITE_DIR" -name "*.html" -print0)
|
||||||
|
|
||||||
|
# Post-sign manifest verification: every .html must have a non-empty
|
||||||
|
# matching .sig. This catches any per-file failure that slipped through
|
||||||
|
# (set -e bails on first failure inside the loop, but a manual --output
|
||||||
|
# write to a directory containing a stale .sig from a prior run could
|
||||||
|
# look "successful" otherwise).
|
||||||
|
missing=0
|
||||||
|
while IFS= read -r -d '' html; do
|
||||||
|
if [ ! -s "${html}.sig" ]; then
|
||||||
|
echo "sign-site: missing/empty signature for $html" >&2
|
||||||
|
missing=$((missing + 1))
|
||||||
|
fi
|
||||||
|
done < <(find "$SITE_DIR" -name "*.html" -print0)
|
||||||
|
|
||||||
|
if [ "$missing" -ne 0 ]; then
|
||||||
|
echo "sign-site: $missing HTML files lack signatures — aborting" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
count=$(find "$SITE_DIR" -name "*.html" -printf '.' | wc -c)
|
|
||||||
echo "Signed $count HTML files in $SITE_DIR."
|
echo "Signed $count HTML files in $SITE_DIR."
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue