From 56afdb867a578f59aae027e355e45b5801208296 Mon Sep 17 00:00:00 2001 From: Levi Neuwirth Date: Wed, 10 Jun 2026 11:13:34 -0400 Subject: [PATCH] Feature modules: URL normalization, Maybe-trust, proper medians MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Empty/all-comments manifest.yaml is the empty archive, not a fatal parse error (AUDIT §3.11) - Backlinks normaliseUrl strips index.html like SimilarLinks, so links to canonical directory URLs invert again; Stats normUrl updated in lockstep (§3.12) - PDF viewer file= query value percent-encoded (hand-rolled RFC 3986 encoder; network-uri is not a dependency) (§3.13) - Photography feed thumbnails embed for flat singles and series children, not just directory entries (§3.14) - Marks trust is Maybe Int: missing confidence/evidence collapses the figure to the bare frame as documented, instead of a literal "0 TRUST"; result-shape glyph centers when no score (§3.15) - Unknown catalog categories fold into one Other bucket; medians take the mean of middle elements; protocol-relative URLs excluded from backlinks; @string/@comment/@preamble skipped in BibTeX parsing; watch-staleness of the once-per-process archive reads documented; stale comments fixed (§3.16, §3.9) Co-Authored-By: Claude Fable 5 --- build/Archive.hs | 33 ++++++++++++++++++++++++++------ build/ArchiveIndex.hs | 11 +++++++++-- build/Backlinks.hs | 22 ++++++++++++++++------ build/BibExtras.hs | 31 +++++++++++++++++++----------- build/Catalog.hs | 7 ++++++- build/Marks.hs | 44 +++++++++++++++++++++++++------------------ build/Photography.hs | 19 +++++++++++++------ build/SimilarLinks.hs | 40 ++++++++++++++++++++++++++++++++------- build/Stats.hs | 30 +++++++++++++++++++++-------- 9 files changed, 172 insertions(+), 65 deletions(-) diff --git a/build/Archive.hs b/build/Archive.hs index 693c83e..9718011 100644 --- a/build/Archive.hs +++ b/build/Archive.hs @@ -163,11 +163,19 @@ readManifest = do else do parsed <- Y.decodeFileEither manifestPath case parsed of - Right es -> return es - Left e -> do - hPutStrLn stderr $ - "[archive] FATAL: manifest.yaml: " ++ show e - exitFailure + -- An empty or all-comments file decodes as YAML @Null@, + -- not as a list. That is the legitimate "drained to zero + -- entries" state, not a broken file — treat it as the + -- empty manifest the absent-file branch already supports. + Right A.Null -> return [] + Right v -> case A.fromJSON v of + A.Success es -> return es + A.Error msg -> fatal msg + Left e -> fatal (show e) + where + fatal msg = do + hPutStrLn stderr $ "[archive] FATAL: manifest.yaml: " ++ msg + exitFailure readRemovedUrls :: IO (Set.Set T.Text) readRemovedUrls = do @@ -308,6 +316,12 @@ loadArchiveEntries = do -- --------------------------------------------------------------------------- -- | All archive rules. Called once from 'Site.rules'. +-- +-- The manifest is read here in 'preprocess' (and 'ArchiveIndex' reads +-- its sidecars in once-per-process CAFs), so archive state is fixed at +-- rule-generation time: under @site watch@, edits to @manifest.yaml@, +-- @removed.yaml@, or the regenerated state JSONs are not picked up +-- until the process restarts. One-shot builds are unaffected. archiveRules :: Rules () archiveRules = do entries <- preprocess loadArchiveEntries @@ -571,10 +585,17 @@ tallyOf xs = intercalate " \183 " | (k, c) <- Map.toList (Map.fromListWith (+) [ (x, 1 :: Int) | x <- xs ]) ] -- | The median of a list of ages, as @"N days"@; an em dash when empty. +-- An even-length list takes the mean of the two middle elements, +-- rounded to the nearest whole day. medianAge :: [Int] -> String medianAge [] = "\8212" medianAge xs = - let m = sort xs !! (length xs `div` 2) + let sorted = sort xs + n = length sorted + upper = sorted !! (n `div` 2) + lower = sorted !! (n `div` 2 - 1) -- forced only when n is even + m | odd n = upper + | otherwise = (lower + upper + 1) `div` 2 in show m ++ if m == 1 then " day" else " days" -- | Parse a @YYYY-MM-DD@ date; 'Nothing' on malformed input. diff --git a/build/ArchiveIndex.hs b/build/ArchiveIndex.hs index d651f44..ad810e6 100644 --- a/build/ArchiveIndex.hs +++ b/build/ArchiveIndex.hs @@ -15,11 +15,18 @@ -- * @Archive@ — surfaces each entry's rot status on its page, the -- @/archive/@ index, and the @/build/@ telemetry. -- --- Both files are loaded once per build via @unsafePerformIO@ CAFs. An --- absent or malformed file degrades safely: an empty index makes the +-- Both files are loaded once per *process* via NOINLINE +-- @unsafePerformIO@ CAFs (as are the manifest/removed URL sets below). +-- An absent or malformed file degrades safely: an empty index makes the -- link consumers no-op; an absent state file makes every entry @Live@ -- (the safe default — no link flip). @archive.py check@ is decoupled -- from @make build@; a build consumes whatever state file exists. +-- +-- Consequence of the once-per-process read (shared with the manifest +-- read in 'Archive.archiveRules'): under @site watch@, edits to +-- @manifest.yaml@, @removed.yaml@, or the regenerated state JSONs are +-- not re-read — the server renders stale archive state until restart. +-- One-shot builds (@make build@ / @make deploy@) are unaffected. module ArchiveIndex ( ArchiveStatus (..) , statusName diff --git a/build/Backlinks.hs b/build/Backlinks.hs index a6b3abc..326d7e6 100644 --- a/build/Backlinks.hs +++ b/build/Backlinks.hs @@ -138,6 +138,8 @@ isPageLink u | otherwise = not (T.isPrefixOf "http://" u) && not (T.isPrefixOf "https://" u) && + -- protocol-relative //host/path is external, not a page path + not (T.isPrefixOf "//" u) && not (T.isPrefixOf "#" u) && not (T.isPrefixOf "mailto:" u) && not (T.isPrefixOf "tel:" u) && @@ -278,17 +280,25 @@ linksCompiler = do -- URL normalisation -- --------------------------------------------------------------------------- --- | Normalise an internal URL as a map key: strip query string, fragment, --- and trailing @.html@; ensure a leading slash; percent-decode the path --- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same --- key. +-- | Normalise an internal URL as a map key: strip query string and +-- fragment; ensure a leading slash; strip a trailing @index.html@ +-- (keeping the directory slash) before the bare @.html@ extension, so a +-- page routed @essays\/foo\/index.html@ and a body link authored in the +-- canonical directory form @\/essays\/foo\/@ collide on the same key +-- (mirrors 'SimilarLinks.normaliseUrl'); percent-decode the path so that +-- @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same key. +-- +-- Both sides of the backlink join go through this function: page keys +-- via 'backlinksFieldWith' (@normaliseUrl ("/" ++ route)@) and link +-- targets via 'targetKey' — so the two always agree. normaliseUrl :: String -> String normaliseUrl url = let t = T.pack url t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t))) t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1 - t3 = fromMaybe t2 (T.stripSuffix ".html" t2) - in percentDecode (T.unpack t3) + t3 = fromMaybe t2 (T.stripSuffix "index.html" t2) + t4 = fromMaybe t3 (T.stripSuffix ".html" t3) + in percentDecode (T.unpack t4) -- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the -- resulting bytestring as UTF-8. Invalid escapes are passed through diff --git a/build/BibExtras.hs b/build/BibExtras.hs index 43235fc..d5239bf 100644 --- a/build/BibExtras.hs +++ b/build/BibExtras.hs @@ -72,6 +72,8 @@ parseBibExtras path = Map.fromList . parseBib <$> readFile' path -- --------------------------------------------------------------------------- -- | Enumerate all entries in a .bib file as (citekey, extra) pairs. +-- @\@string@ \/ @\@comment@ \/ @\@preamble@ blocks (case-insensitive) +-- carry no citekey and are skipped wholesale. parseBib :: String -> [(String, BibExtra)] parseBib input = go (dropTo '@' input) where @@ -81,19 +83,26 @@ parseBib input = go (dropTo '@' input) go [] = [] go ('@':rest) = let -- Entry type, then '{', then citekey, then ',', then fields, then '}'. - r1 = dropWhile isAlphaNum rest -- skip type name + (typeName, r1) = span isAlphaNum rest r2 = dropWhile isSpace r1 in case r2 of - '{':r3 -> - let (citekey, r4) = span (\c -> c /= ',' && not (isSpace c)) r3 - r5 = dropWhile (\c -> c /= ',' && c /= '}') r4 - in case r5 of - ',':r6 -> - let (flds, r7) = parseFields r6 - in (trim citekey, toExtra flds) : go (dropTo '@' r7) - -- Fieldless entries: walk past and carry on. - '}':r6 -> (trim citekey, emptyBibExtra) : go (dropTo '@' r6) - _ -> [] + '{':r3 + -- Not citekey entries: a @string macro name (or the body + -- of a @comment/@preamble) must never be parsed as a + -- citekey. Skip the balanced brace group and carry on. + | map toLower typeName `elem` ["string", "comment", "preamble"] -> + let (_, r4) = readBraces 1 "" r3 + in go (dropTo '@' r4) + | otherwise -> + let (citekey, r4) = span (\c -> c /= ',' && not (isSpace c)) r3 + r5 = dropWhile (\c -> c /= ',' && c /= '}') r4 + in case r5 of + ',':r6 -> + let (flds, r7) = parseFields r6 + in (trim citekey, toExtra flds) : go (dropTo '@' r7) + -- Fieldless entries: walk past and carry on. + '}':r6 -> (trim citekey, emptyBibExtra) : go (dropTo '@' r6) + _ -> [] _ -> go (dropTo '@' r2) go (_:rest) = go (dropTo '@' rest) diff --git a/build/Catalog.hs b/build/Catalog.hs index b0225d1..77e2823 100644 --- a/build/Catalog.hs +++ b/build/Catalog.hs @@ -99,7 +99,12 @@ parseCatalogEntry item = do year = parseYear meta dur = lookupString "duration" meta instr = lookupString "instrumentation" meta - cat = fromMaybe "other" (lookupString "category" meta) + -- Fold unknown categories into the canonical "other" + -- bucket here: two distinct unknown values share a rank + -- but would groupBy into separate groups, rendering as + -- adjacent duplicate "Other" sections. + rawCat = fromMaybe "other" (lookupString "category" meta) + cat = if rawCat `elem` categoryOrder then rawCat else "other" return $ Just CatalogEntry { ceTitle = title , ceUrl = url diff --git a/build/Marks.hs b/build/Marks.hs index ed4dffd..f9bf6e4 100644 --- a/build/Marks.hs +++ b/build/Marks.hs @@ -230,7 +230,7 @@ data EpistemicData = EpistemicData , epPeerStatus :: Maybe String -- ^ Validated peer-status slug ('Nothing' when absent / unreviewed / invalid). , epResultShape :: Maybe String -- ^ Validated result-shape value. , epStability :: String -- ^ Always one of the five stability labels. - , epTrust :: Int -- ^ Trust score 0–100 (60/40 weighted; @proved@ substitutes 100 for confidence). + , epTrust :: Maybe Int -- ^ Trust score 0–100 (60/40 weighted; @proved@ substitutes 100 for confidence). 'Nothing' when confidence or evidence is missing — no label is rendered. } -- | Read the figure inputs from a Hakyll item's metadata + git history. @@ -267,15 +267,16 @@ readEpistemicData item = do trimS = trim' -- | Trust score: the same 60/40 weighted composite of confidence and --- evidence used by 'Contexts.overallScoreField'. Returns 0 when either --- input is missing — which is fine for the figure (the polygon and --- trust label simply collapse to the bare frame). -computeTrust :: Maybe Int -> Maybe Int -> Int +-- evidence used by 'Contexts.overallScoreField'. Returns 'Nothing' +-- when either input is missing — the figure then renders no trust +-- label at all (it collapses to the bare frame), rather than a +-- literal "0" indistinguishable from an authored zero score. +computeTrust :: Maybe Int -> Maybe Int -> Maybe Int computeTrust (Just c) (Just e) = let raw :: Double raw = fromIntegral c / 100.0 * 0.6 + fromIntegral (e - 1) / 4.0 * 0.4 - in max 0 (min 100 (round (raw * 100.0))) -computeTrust _ _ = 0 + in Just (max 0 (min 100 (round (raw * 100.0)))) +computeTrust _ _ = Nothing -- | Same predicate as 'Contexts.isProvedConfidence' — local copy to keep -- the module's dependency graph light (Marks → Stability only). The @@ -390,15 +391,16 @@ renderEpistemicFigure d = T.concat [ "" + , " aria-label=\"Epistemic figure: " + , maybe "" (\t -> "trust " <> T.pack (show t) <> ", ") (epTrust d) + , "stability ", T.pack (epStability d), "\">" , renderRoundel , renderGuides , renderAxes , renderPolygon d , renderVertexMarks d , renderTicks (epStability d) (epPeerStatus d) - , renderTrustLabel (epTrust d) + , maybe "" renderTrustLabel (epTrust d) , renderResultShape (epResultShape d) (epTrust d) , "" ] @@ -578,10 +580,11 @@ renderTrustLabel score = T.concat , " opacity=\"0.7\">TRUST" ] --- | Result-shape glyph immediately to the right of the trust score. -renderResultShape :: Maybe String -> Int -> T.Text +-- | Result-shape glyph immediately to the right of the trust score — +-- or centred in its place when no trust score is rendered. +renderResultShape :: Maybe String -> Maybe Int -> T.Text renderResultShape Nothing _ = "" -renderResultShape (Just shape) score = +renderResultShape (Just shape) mScore = let glyph = case shape of "positive" -> "+" "negative" -> "\x2212" -- minus sign (not hyphen-minus) @@ -589,15 +592,20 @@ renderResultShape (Just shape) score = "comparative" -> "\x223C" -- ∼ "descriptive" -> "\x25A1" -- □ _ -> "" - -- Offset proportional to the trust number's width (digits ≈ 8 px each). - digitCount = length (show score) - offset = fromIntegral digitCount * 4.5 + 3 :: Double + -- Offset proportional to the trust number's width (digits ≈ 8 px + -- each); with no trust label the glyph takes the centre itself. + (x, anchor) = case mScore of + Just score -> + let digitCount = length (show score) + offset = fromIntegral digitCount * 4.5 + 3 :: Double + in (fxCenter + offset, "start") + Nothing -> (fxCenter, "middle") in if T.null (T.pack glyph) then "" else T.concat - [ "" , T.pack glyph diff --git a/build/Photography.hs b/build/Photography.hs index 95bef5c..f885270 100644 --- a/build/Photography.hs +++ b/build/Photography.hs @@ -456,13 +456,20 @@ photographyFeedDescription = field "description" $ \item -> do body <- itemBody <$> (loadSnapshot ident "content" :: Compiler (Item String)) meta <- getMetadata ident let fp = toFilePath ident - isDir = takeFileName fp == "index.md" + -- Same asset-path derivation as 'buildPin': directory entries + -- (/index.md) and series children (/.md) + -- both key assets off the parent directory; a flat single + -- (content/photography/foo.md) has no entry directory, so its + -- co-located assets route to /photography/ directly. + isFlat = takeDirectory fp == "content/photography" + && takeFileName fp /= "index.md" slug = takeFileName (takeDirectory fp) - photo = lookupString "photo" meta - imgTag = case (isDir, photo) of - (True, Just p) | not (null p) -> - "

\"\"

\n" + imgTag = case lookupString "photo" meta of + Just p | not (null p) -> + let src = if isFlat then "/photography/" ++ p + else "/photography/" ++ slug ++ "/" ++ p + in "

\"\"

\n" _ -> "" return (imgTag ++ body) diff --git a/build/SimilarLinks.hs b/build/SimilarLinks.hs index 639a4a3..d12376f 100644 --- a/build/SimilarLinks.hs +++ b/build/SimilarLinks.hs @@ -49,7 +49,8 @@ instance Aeson.FromJSON SimilarEntry where -- --------------------------------------------------------------------------- -- | Maximum entries rendered in the "Related" block. The on-disk JSON may --- contain more (embed.py's TOP_N); the template caps the display. +-- contain more (embed.py's TOP_N); 'similarLinksField' caps the list +-- (@take maxSimilar@) before rendering. maxSimilar :: Int maxSimilar = 3 @@ -101,10 +102,10 @@ normaliseUrl url = -- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths -- collide with their decoded form on map lookup. Mirrors --- 'Backlinks.percentDecode'; the two implementations are intentionally --- duplicated because they apply different normalisations *before* --- decoding (Backlinks strips @.html@ unconditionally; SimilarLinks --- preserves the trailing-slash form for index pages). +-- 'Backlinks.percentDecode' (and 'Backlinks.normaliseUrl' now applies +-- the same strip-@index.html@-then-@.html@ normalisation as this +-- module); the duplication keeps the two modules dependency-free of +-- each other. percentDecode :: String -> String percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go where @@ -121,6 +122,25 @@ percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go | c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10) | otherwise = Nothing +-- | Percent-encode a string for use as a URI query value: RFC 3986 +-- unreserved characters pass through; everything else — including @&@, +-- @?@, @#@, spaces, and non-ASCII text via its UTF-8 bytes — becomes +-- @%XX@. Hand-rolled (the moral equivalent of network-uri's +-- @escapeURIString isUnreserved@) because network-uri is not otherwise +-- a dependency. The output is also HTML-attribute-safe: it contains +-- only unreserved characters and @%XX@ escapes. +percentEncode :: String -> String +percentEncode = concatMap enc . BS.unpack . TE.encodeUtf8 . T.pack + where + enc b + | unreserved b = [toEnum (fromIntegral b)] + | otherwise = ['%', hexDigit (b `div` 16), hexDigit (b `mod` 16)] + unreserved b = + let c = toEnum (fromIntegral b) :: Char + in (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || (c >= '0' && c <= '9') || c `elem` ("-._~" :: String) + hexDigit n = "0123456789ABCDEF" !! fromIntegral n + -- --------------------------------------------------------------------------- -- HTML rendering -- --------------------------------------------------------------------------- @@ -153,8 +173,14 @@ renderSimilarLinks entries = ++ "\n" renderPdf se = - let raw = seUrl se - viewerUrl = "/pdfjs/web/viewer.html?file=" ++ escapeHtml raw + -- The PDF path becomes the @file=@ query value, so it must be + -- percent-encoded (HTML escaping alone leaves @&@/@?@/@#@/spaces + -- free to break the query). A @#page=N@ fragment stays a fragment + -- of the viewer URL itself — PDF.js reads it from location.hash. + let raw = seUrl se + (path, frag) = break (== '#') raw + viewerUrl = "/pdfjs/web/viewer.html?file=" + ++ percentEncode path ++ escapeHtml frag in "