Feature modules: URL normalization, Maybe-trust, proper medians
- Empty/all-comments manifest.yaml is the empty archive, not a fatal parse error (AUDIT §3.11) - Backlinks normaliseUrl strips index.html like SimilarLinks, so links to canonical directory URLs invert again; Stats normUrl updated in lockstep (§3.12) - PDF viewer file= query value percent-encoded (hand-rolled RFC 3986 encoder; network-uri is not a dependency) (§3.13) - Photography feed thumbnails embed for flat singles and series children, not just directory entries (§3.14) - Marks trust is Maybe Int: missing confidence/evidence collapses the figure to the bare frame as documented, instead of a literal "0 TRUST"; result-shape glyph centers when no score (§3.15) - Unknown catalog categories fold into one Other bucket; medians take the mean of middle elements; protocol-relative URLs excluded from backlinks; @string/@comment/@preamble skipped in BibTeX parsing; watch-staleness of the once-per-process archive reads documented; stale comments fixed (§3.16, §3.9) Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
f254ce866e
commit
56afdb867a
|
|
@ -163,10 +163,18 @@ readManifest = do
|
||||||
else do
|
else do
|
||||||
parsed <- Y.decodeFileEither manifestPath
|
parsed <- Y.decodeFileEither manifestPath
|
||||||
case parsed of
|
case parsed of
|
||||||
Right es -> return es
|
-- An empty or all-comments file decodes as YAML @Null@,
|
||||||
Left e -> do
|
-- not as a list. That is the legitimate "drained to zero
|
||||||
hPutStrLn stderr $
|
-- entries" state, not a broken file — treat it as the
|
||||||
"[archive] FATAL: manifest.yaml: " ++ show e
|
-- empty manifest the absent-file branch already supports.
|
||||||
|
Right A.Null -> return []
|
||||||
|
Right v -> case A.fromJSON v of
|
||||||
|
A.Success es -> return es
|
||||||
|
A.Error msg -> fatal msg
|
||||||
|
Left e -> fatal (show e)
|
||||||
|
where
|
||||||
|
fatal msg = do
|
||||||
|
hPutStrLn stderr $ "[archive] FATAL: manifest.yaml: " ++ msg
|
||||||
exitFailure
|
exitFailure
|
||||||
|
|
||||||
readRemovedUrls :: IO (Set.Set T.Text)
|
readRemovedUrls :: IO (Set.Set T.Text)
|
||||||
|
|
@ -308,6 +316,12 @@ loadArchiveEntries = do
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
-- | All archive rules. Called once from 'Site.rules'.
|
-- | All archive rules. Called once from 'Site.rules'.
|
||||||
|
--
|
||||||
|
-- The manifest is read here in 'preprocess' (and 'ArchiveIndex' reads
|
||||||
|
-- its sidecars in once-per-process CAFs), so archive state is fixed at
|
||||||
|
-- rule-generation time: under @site watch@, edits to @manifest.yaml@,
|
||||||
|
-- @removed.yaml@, or the regenerated state JSONs are not picked up
|
||||||
|
-- until the process restarts. One-shot builds are unaffected.
|
||||||
archiveRules :: Rules ()
|
archiveRules :: Rules ()
|
||||||
archiveRules = do
|
archiveRules = do
|
||||||
entries <- preprocess loadArchiveEntries
|
entries <- preprocess loadArchiveEntries
|
||||||
|
|
@ -571,10 +585,17 @@ tallyOf xs = intercalate " \183 "
|
||||||
| (k, c) <- Map.toList (Map.fromListWith (+) [ (x, 1 :: Int) | x <- xs ]) ]
|
| (k, c) <- Map.toList (Map.fromListWith (+) [ (x, 1 :: Int) | x <- xs ]) ]
|
||||||
|
|
||||||
-- | The median of a list of ages, as @"N days"@; an em dash when empty.
|
-- | The median of a list of ages, as @"N days"@; an em dash when empty.
|
||||||
|
-- An even-length list takes the mean of the two middle elements,
|
||||||
|
-- rounded to the nearest whole day.
|
||||||
medianAge :: [Int] -> String
|
medianAge :: [Int] -> String
|
||||||
medianAge [] = "\8212"
|
medianAge [] = "\8212"
|
||||||
medianAge xs =
|
medianAge xs =
|
||||||
let m = sort xs !! (length xs `div` 2)
|
let sorted = sort xs
|
||||||
|
n = length sorted
|
||||||
|
upper = sorted !! (n `div` 2)
|
||||||
|
lower = sorted !! (n `div` 2 - 1) -- forced only when n is even
|
||||||
|
m | odd n = upper
|
||||||
|
| otherwise = (lower + upper + 1) `div` 2
|
||||||
in show m ++ if m == 1 then " day" else " days"
|
in show m ++ if m == 1 then " day" else " days"
|
||||||
|
|
||||||
-- | Parse a @YYYY-MM-DD@ date; 'Nothing' on malformed input.
|
-- | Parse a @YYYY-MM-DD@ date; 'Nothing' on malformed input.
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,18 @@
|
||||||
-- * @Archive@ — surfaces each entry's rot status on its page, the
|
-- * @Archive@ — surfaces each entry's rot status on its page, the
|
||||||
-- @/archive/@ index, and the @/build/@ telemetry.
|
-- @/archive/@ index, and the @/build/@ telemetry.
|
||||||
--
|
--
|
||||||
-- Both files are loaded once per build via @unsafePerformIO@ CAFs. An
|
-- Both files are loaded once per *process* via NOINLINE
|
||||||
-- absent or malformed file degrades safely: an empty index makes the
|
-- @unsafePerformIO@ CAFs (as are the manifest/removed URL sets below).
|
||||||
|
-- An absent or malformed file degrades safely: an empty index makes the
|
||||||
-- link consumers no-op; an absent state file makes every entry @Live@
|
-- link consumers no-op; an absent state file makes every entry @Live@
|
||||||
-- (the safe default — no link flip). @archive.py check@ is decoupled
|
-- (the safe default — no link flip). @archive.py check@ is decoupled
|
||||||
-- from @make build@; a build consumes whatever state file exists.
|
-- from @make build@; a build consumes whatever state file exists.
|
||||||
|
--
|
||||||
|
-- Consequence of the once-per-process read (shared with the manifest
|
||||||
|
-- read in 'Archive.archiveRules'): under @site watch@, edits to
|
||||||
|
-- @manifest.yaml@, @removed.yaml@, or the regenerated state JSONs are
|
||||||
|
-- not re-read — the server renders stale archive state until restart.
|
||||||
|
-- One-shot builds (@make build@ / @make deploy@) are unaffected.
|
||||||
module ArchiveIndex
|
module ArchiveIndex
|
||||||
( ArchiveStatus (..)
|
( ArchiveStatus (..)
|
||||||
, statusName
|
, statusName
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,8 @@ isPageLink u
|
||||||
| otherwise =
|
| otherwise =
|
||||||
not (T.isPrefixOf "http://" u) &&
|
not (T.isPrefixOf "http://" u) &&
|
||||||
not (T.isPrefixOf "https://" u) &&
|
not (T.isPrefixOf "https://" u) &&
|
||||||
|
-- protocol-relative //host/path is external, not a page path
|
||||||
|
not (T.isPrefixOf "//" u) &&
|
||||||
not (T.isPrefixOf "#" u) &&
|
not (T.isPrefixOf "#" u) &&
|
||||||
not (T.isPrefixOf "mailto:" u) &&
|
not (T.isPrefixOf "mailto:" u) &&
|
||||||
not (T.isPrefixOf "tel:" u) &&
|
not (T.isPrefixOf "tel:" u) &&
|
||||||
|
|
@ -278,17 +280,25 @@ linksCompiler = do
|
||||||
-- URL normalisation
|
-- URL normalisation
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
-- | Normalise an internal URL as a map key: strip query string, fragment,
|
-- | Normalise an internal URL as a map key: strip query string and
|
||||||
-- and trailing @.html@; ensure a leading slash; percent-decode the path
|
-- fragment; ensure a leading slash; strip a trailing @index.html@
|
||||||
-- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same
|
-- (keeping the directory slash) before the bare @.html@ extension, so a
|
||||||
-- key.
|
-- page routed @essays\/foo\/index.html@ and a body link authored in the
|
||||||
|
-- canonical directory form @\/essays\/foo\/@ collide on the same key
|
||||||
|
-- (mirrors 'SimilarLinks.normaliseUrl'); percent-decode the path so that
|
||||||
|
-- @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same key.
|
||||||
|
--
|
||||||
|
-- Both sides of the backlink join go through this function: page keys
|
||||||
|
-- via 'backlinksFieldWith' (@normaliseUrl ("/" ++ route)@) and link
|
||||||
|
-- targets via 'targetKey' — so the two always agree.
|
||||||
normaliseUrl :: String -> String
|
normaliseUrl :: String -> String
|
||||||
normaliseUrl url =
|
normaliseUrl url =
|
||||||
let t = T.pack url
|
let t = T.pack url
|
||||||
t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t)))
|
t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t)))
|
||||||
t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1
|
t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1
|
||||||
t3 = fromMaybe t2 (T.stripSuffix ".html" t2)
|
t3 = fromMaybe t2 (T.stripSuffix "index.html" t2)
|
||||||
in percentDecode (T.unpack t3)
|
t4 = fromMaybe t3 (T.stripSuffix ".html" t3)
|
||||||
|
in percentDecode (T.unpack t4)
|
||||||
|
|
||||||
-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the
|
-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the
|
||||||
-- resulting bytestring as UTF-8. Invalid escapes are passed through
|
-- resulting bytestring as UTF-8. Invalid escapes are passed through
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,8 @@ parseBibExtras path = Map.fromList . parseBib <$> readFile' path
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
-- | Enumerate all entries in a .bib file as (citekey, extra) pairs.
|
-- | Enumerate all entries in a .bib file as (citekey, extra) pairs.
|
||||||
|
-- @\@string@ \/ @\@comment@ \/ @\@preamble@ blocks (case-insensitive)
|
||||||
|
-- carry no citekey and are skipped wholesale.
|
||||||
parseBib :: String -> [(String, BibExtra)]
|
parseBib :: String -> [(String, BibExtra)]
|
||||||
parseBib input = go (dropTo '@' input)
|
parseBib input = go (dropTo '@' input)
|
||||||
where
|
where
|
||||||
|
|
@ -81,10 +83,17 @@ parseBib input = go (dropTo '@' input)
|
||||||
go [] = []
|
go [] = []
|
||||||
go ('@':rest) =
|
go ('@':rest) =
|
||||||
let -- Entry type, then '{', then citekey, then ',', then fields, then '}'.
|
let -- Entry type, then '{', then citekey, then ',', then fields, then '}'.
|
||||||
r1 = dropWhile isAlphaNum rest -- skip type name
|
(typeName, r1) = span isAlphaNum rest
|
||||||
r2 = dropWhile isSpace r1
|
r2 = dropWhile isSpace r1
|
||||||
in case r2 of
|
in case r2 of
|
||||||
'{':r3 ->
|
'{':r3
|
||||||
|
-- Not citekey entries: a @string macro name (or the body
|
||||||
|
-- of a @comment/@preamble) must never be parsed as a
|
||||||
|
-- citekey. Skip the balanced brace group and carry on.
|
||||||
|
| map toLower typeName `elem` ["string", "comment", "preamble"] ->
|
||||||
|
let (_, r4) = readBraces 1 "" r3
|
||||||
|
in go (dropTo '@' r4)
|
||||||
|
| otherwise ->
|
||||||
let (citekey, r4) = span (\c -> c /= ',' && not (isSpace c)) r3
|
let (citekey, r4) = span (\c -> c /= ',' && not (isSpace c)) r3
|
||||||
r5 = dropWhile (\c -> c /= ',' && c /= '}') r4
|
r5 = dropWhile (\c -> c /= ',' && c /= '}') r4
|
||||||
in case r5 of
|
in case r5 of
|
||||||
|
|
|
||||||
|
|
@ -99,7 +99,12 @@ parseCatalogEntry item = do
|
||||||
year = parseYear meta
|
year = parseYear meta
|
||||||
dur = lookupString "duration" meta
|
dur = lookupString "duration" meta
|
||||||
instr = lookupString "instrumentation" meta
|
instr = lookupString "instrumentation" meta
|
||||||
cat = fromMaybe "other" (lookupString "category" meta)
|
-- Fold unknown categories into the canonical "other"
|
||||||
|
-- bucket here: two distinct unknown values share a rank
|
||||||
|
-- but would groupBy into separate groups, rendering as
|
||||||
|
-- adjacent duplicate "Other" sections.
|
||||||
|
rawCat = fromMaybe "other" (lookupString "category" meta)
|
||||||
|
cat = if rawCat `elem` categoryOrder then rawCat else "other"
|
||||||
return $ Just CatalogEntry
|
return $ Just CatalogEntry
|
||||||
{ ceTitle = title
|
{ ceTitle = title
|
||||||
, ceUrl = url
|
, ceUrl = url
|
||||||
|
|
|
||||||
|
|
@ -230,7 +230,7 @@ data EpistemicData = EpistemicData
|
||||||
, epPeerStatus :: Maybe String -- ^ Validated peer-status slug ('Nothing' when absent / unreviewed / invalid).
|
, epPeerStatus :: Maybe String -- ^ Validated peer-status slug ('Nothing' when absent / unreviewed / invalid).
|
||||||
, epResultShape :: Maybe String -- ^ Validated result-shape value.
|
, epResultShape :: Maybe String -- ^ Validated result-shape value.
|
||||||
, epStability :: String -- ^ Always one of the five stability labels.
|
, epStability :: String -- ^ Always one of the five stability labels.
|
||||||
, epTrust :: Int -- ^ Trust score 0–100 (60/40 weighted; @proved@ substitutes 100 for confidence).
|
, epTrust :: Maybe Int -- ^ Trust score 0–100 (60/40 weighted; @proved@ substitutes 100 for confidence). 'Nothing' when confidence or evidence is missing — no label is rendered.
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | Read the figure inputs from a Hakyll item's metadata + git history.
|
-- | Read the figure inputs from a Hakyll item's metadata + git history.
|
||||||
|
|
@ -267,15 +267,16 @@ readEpistemicData item = do
|
||||||
trimS = trim'
|
trimS = trim'
|
||||||
|
|
||||||
-- | Trust score: the same 60/40 weighted composite of confidence and
|
-- | Trust score: the same 60/40 weighted composite of confidence and
|
||||||
-- evidence used by 'Contexts.overallScoreField'. Returns 0 when either
|
-- evidence used by 'Contexts.overallScoreField'. Returns 'Nothing'
|
||||||
-- input is missing — which is fine for the figure (the polygon and
|
-- when either input is missing — the figure then renders no trust
|
||||||
-- trust label simply collapse to the bare frame).
|
-- label at all (it collapses to the bare frame), rather than a
|
||||||
computeTrust :: Maybe Int -> Maybe Int -> Int
|
-- literal "0" indistinguishable from an authored zero score.
|
||||||
|
computeTrust :: Maybe Int -> Maybe Int -> Maybe Int
|
||||||
computeTrust (Just c) (Just e) =
|
computeTrust (Just c) (Just e) =
|
||||||
let raw :: Double
|
let raw :: Double
|
||||||
raw = fromIntegral c / 100.0 * 0.6 + fromIntegral (e - 1) / 4.0 * 0.4
|
raw = fromIntegral c / 100.0 * 0.6 + fromIntegral (e - 1) / 4.0 * 0.4
|
||||||
in max 0 (min 100 (round (raw * 100.0)))
|
in Just (max 0 (min 100 (round (raw * 100.0))))
|
||||||
computeTrust _ _ = 0
|
computeTrust _ _ = Nothing
|
||||||
|
|
||||||
-- | Same predicate as 'Contexts.isProvedConfidence' — local copy to keep
|
-- | Same predicate as 'Contexts.isProvedConfidence' — local copy to keep
|
||||||
-- the module's dependency graph light (Marks → Stability only). The
|
-- the module's dependency graph light (Marks → Stability only). The
|
||||||
|
|
@ -390,15 +391,16 @@ renderEpistemicFigure d = T.concat
|
||||||
[ "<svg xmlns=\"http://www.w3.org/2000/svg\""
|
[ "<svg xmlns=\"http://www.w3.org/2000/svg\""
|
||||||
, " viewBox=\"0 0 200 200\""
|
, " viewBox=\"0 0 200 200\""
|
||||||
, " role=\"img\""
|
, " role=\"img\""
|
||||||
, " aria-label=\"Epistemic figure: trust ", T.pack (show (epTrust d))
|
, " aria-label=\"Epistemic figure: "
|
||||||
, ", stability ", T.pack (epStability d), "\">"
|
, maybe "" (\t -> "trust " <> T.pack (show t) <> ", ") (epTrust d)
|
||||||
|
, "stability ", T.pack (epStability d), "\">"
|
||||||
, renderRoundel
|
, renderRoundel
|
||||||
, renderGuides
|
, renderGuides
|
||||||
, renderAxes
|
, renderAxes
|
||||||
, renderPolygon d
|
, renderPolygon d
|
||||||
, renderVertexMarks d
|
, renderVertexMarks d
|
||||||
, renderTicks (epStability d) (epPeerStatus d)
|
, renderTicks (epStability d) (epPeerStatus d)
|
||||||
, renderTrustLabel (epTrust d)
|
, maybe "" renderTrustLabel (epTrust d)
|
||||||
, renderResultShape (epResultShape d) (epTrust d)
|
, renderResultShape (epResultShape d) (epTrust d)
|
||||||
, "</svg>"
|
, "</svg>"
|
||||||
]
|
]
|
||||||
|
|
@ -578,10 +580,11 @@ renderTrustLabel score = T.concat
|
||||||
, " opacity=\"0.7\">TRUST</text>"
|
, " opacity=\"0.7\">TRUST</text>"
|
||||||
]
|
]
|
||||||
|
|
||||||
-- | Result-shape glyph immediately to the right of the trust score.
|
-- | Result-shape glyph immediately to the right of the trust score —
|
||||||
renderResultShape :: Maybe String -> Int -> T.Text
|
-- or centred in its place when no trust score is rendered.
|
||||||
|
renderResultShape :: Maybe String -> Maybe Int -> T.Text
|
||||||
renderResultShape Nothing _ = ""
|
renderResultShape Nothing _ = ""
|
||||||
renderResultShape (Just shape) score =
|
renderResultShape (Just shape) mScore =
|
||||||
let glyph = case shape of
|
let glyph = case shape of
|
||||||
"positive" -> "+"
|
"positive" -> "+"
|
||||||
"negative" -> "\x2212" -- minus sign (not hyphen-minus)
|
"negative" -> "\x2212" -- minus sign (not hyphen-minus)
|
||||||
|
|
@ -589,15 +592,20 @@ renderResultShape (Just shape) score =
|
||||||
"comparative" -> "\x223C" -- ∼
|
"comparative" -> "\x223C" -- ∼
|
||||||
"descriptive" -> "\x25A1" -- □
|
"descriptive" -> "\x25A1" -- □
|
||||||
_ -> ""
|
_ -> ""
|
||||||
-- Offset proportional to the trust number's width (digits ≈ 8 px each).
|
-- Offset proportional to the trust number's width (digits ≈ 8 px
|
||||||
digitCount = length (show score)
|
-- each); with no trust label the glyph takes the centre itself.
|
||||||
|
(x, anchor) = case mScore of
|
||||||
|
Just score ->
|
||||||
|
let digitCount = length (show score)
|
||||||
offset = fromIntegral digitCount * 4.5 + 3 :: Double
|
offset = fromIntegral digitCount * 4.5 + 3 :: Double
|
||||||
|
in (fxCenter + offset, "start")
|
||||||
|
Nothing -> (fxCenter, "middle")
|
||||||
in if T.null (T.pack glyph)
|
in if T.null (T.pack glyph)
|
||||||
then ""
|
then ""
|
||||||
else T.concat
|
else T.concat
|
||||||
[ "<text x=\"", ff (fxCenter + offset)
|
[ "<text x=\"", ff x
|
||||||
, "\" y=\"", ff (fyCenter + 4)
|
, "\" y=\"", ff (fyCenter + 4)
|
||||||
, "\" text-anchor=\"start\""
|
, "\" text-anchor=\"", anchor, "\""
|
||||||
, " fill=\"currentColor\" stroke=\"none\""
|
, " fill=\"currentColor\" stroke=\"none\""
|
||||||
, " font-family=\"Spectral, serif\" font-size=\"16\">"
|
, " font-family=\"Spectral, serif\" font-size=\"16\">"
|
||||||
, T.pack glyph
|
, T.pack glyph
|
||||||
|
|
|
||||||
|
|
@ -456,13 +456,20 @@ photographyFeedDescription = field "description" $ \item -> do
|
||||||
body <- itemBody <$> (loadSnapshot ident "content" :: Compiler (Item String))
|
body <- itemBody <$> (loadSnapshot ident "content" :: Compiler (Item String))
|
||||||
meta <- getMetadata ident
|
meta <- getMetadata ident
|
||||||
let fp = toFilePath ident
|
let fp = toFilePath ident
|
||||||
isDir = takeFileName fp == "index.md"
|
-- Same asset-path derivation as 'buildPin': directory entries
|
||||||
|
-- (<slug>/index.md) and series children (<series>/<photo>.md)
|
||||||
|
-- both key assets off the parent directory; a flat single
|
||||||
|
-- (content/photography/foo.md) has no entry directory, so its
|
||||||
|
-- co-located assets route to /photography/ directly.
|
||||||
|
isFlat = takeDirectory fp == "content/photography"
|
||||||
|
&& takeFileName fp /= "index.md"
|
||||||
slug = takeFileName (takeDirectory fp)
|
slug = takeFileName (takeDirectory fp)
|
||||||
photo = lookupString "photo" meta
|
imgTag = case lookupString "photo" meta of
|
||||||
imgTag = case (isDir, photo) of
|
Just p | not (null p) ->
|
||||||
(True, Just p) | not (null p) ->
|
let src = if isFlat then "/photography/" ++ p
|
||||||
"<p><img src=\"https://levineuwirth.org/photography/"
|
else "/photography/" ++ slug ++ "/" ++ p
|
||||||
++ slug ++ "/" ++ p ++ "\" alt=\"\"></p>\n"
|
in "<p><img src=\"https://levineuwirth.org"
|
||||||
|
++ src ++ "\" alt=\"\"></p>\n"
|
||||||
_ -> ""
|
_ -> ""
|
||||||
return (imgTag ++ body)
|
return (imgTag ++ body)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,8 @@ instance Aeson.FromJSON SimilarEntry where
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
-- | Maximum entries rendered in the "Related" block. The on-disk JSON may
|
-- | Maximum entries rendered in the "Related" block. The on-disk JSON may
|
||||||
-- contain more (embed.py's TOP_N); the template caps the display.
|
-- contain more (embed.py's TOP_N); 'similarLinksField' caps the list
|
||||||
|
-- (@take maxSimilar@) before rendering.
|
||||||
maxSimilar :: Int
|
maxSimilar :: Int
|
||||||
maxSimilar = 3
|
maxSimilar = 3
|
||||||
|
|
||||||
|
|
@ -101,10 +102,10 @@ normaliseUrl url =
|
||||||
|
|
||||||
-- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths
|
-- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths
|
||||||
-- collide with their decoded form on map lookup. Mirrors
|
-- collide with their decoded form on map lookup. Mirrors
|
||||||
-- 'Backlinks.percentDecode'; the two implementations are intentionally
|
-- 'Backlinks.percentDecode' (and 'Backlinks.normaliseUrl' now applies
|
||||||
-- duplicated because they apply different normalisations *before*
|
-- the same strip-@index.html@-then-@.html@ normalisation as this
|
||||||
-- decoding (Backlinks strips @.html@ unconditionally; SimilarLinks
|
-- module); the duplication keeps the two modules dependency-free of
|
||||||
-- preserves the trailing-slash form for index pages).
|
-- each other.
|
||||||
percentDecode :: String -> String
|
percentDecode :: String -> String
|
||||||
percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
|
percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
|
||||||
where
|
where
|
||||||
|
|
@ -121,6 +122,25 @@ percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
|
||||||
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
|
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
-- | Percent-encode a string for use as a URI query value: RFC 3986
|
||||||
|
-- unreserved characters pass through; everything else — including @&@,
|
||||||
|
-- @?@, @#@, spaces, and non-ASCII text via its UTF-8 bytes — becomes
|
||||||
|
-- @%XX@. Hand-rolled (the moral equivalent of network-uri's
|
||||||
|
-- @escapeURIString isUnreserved@) because network-uri is not otherwise
|
||||||
|
-- a dependency. The output is also HTML-attribute-safe: it contains
|
||||||
|
-- only unreserved characters and @%XX@ escapes.
|
||||||
|
percentEncode :: String -> String
|
||||||
|
percentEncode = concatMap enc . BS.unpack . TE.encodeUtf8 . T.pack
|
||||||
|
where
|
||||||
|
enc b
|
||||||
|
| unreserved b = [toEnum (fromIntegral b)]
|
||||||
|
| otherwise = ['%', hexDigit (b `div` 16), hexDigit (b `mod` 16)]
|
||||||
|
unreserved b =
|
||||||
|
let c = toEnum (fromIntegral b) :: Char
|
||||||
|
in (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
||||||
|
|| (c >= '0' && c <= '9') || c `elem` ("-._~" :: String)
|
||||||
|
hexDigit n = "0123456789ABCDEF" !! fromIntegral n
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
-- HTML rendering
|
-- HTML rendering
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
@ -153,8 +173,14 @@ renderSimilarLinks entries =
|
||||||
++ "</a></li>\n"
|
++ "</a></li>\n"
|
||||||
|
|
||||||
renderPdf se =
|
renderPdf se =
|
||||||
|
-- The PDF path becomes the @file=@ query value, so it must be
|
||||||
|
-- percent-encoded (HTML escaping alone leaves @&@/@?@/@#@/spaces
|
||||||
|
-- free to break the query). A @#page=N@ fragment stays a fragment
|
||||||
|
-- of the viewer URL itself — PDF.js reads it from location.hash.
|
||||||
let raw = seUrl se
|
let raw = seUrl se
|
||||||
viewerUrl = "/pdfjs/web/viewer.html?file=" ++ escapeHtml raw
|
(path, frag) = break (== '#') raw
|
||||||
|
viewerUrl = "/pdfjs/web/viewer.html?file="
|
||||||
|
++ percentEncode path ++ escapeHtml frag
|
||||||
in "<li class=\"similar-links-item\">"
|
in "<li class=\"similar-links-item\">"
|
||||||
++ "<a class=\"similar-link pdf-link\""
|
++ "<a class=\"similar-link pdf-link\""
|
||||||
++ " href=\"" ++ viewerUrl ++ "\""
|
++ " href=\"" ++ viewerUrl ++ "\""
|
||||||
|
|
|
||||||
|
|
@ -156,21 +156,35 @@ stripHtmlTags = go
|
||||||
skipApos (_:rs) = skipApos rs
|
skipApos (_:rs) = skipApos rs
|
||||||
skipApos [] = []
|
skipApos [] = []
|
||||||
|
|
||||||
-- | Normalise a page URL for backlink map lookup (strip trailing .html).
|
-- | Normalise a page URL for backlink map lookup. Must mirror
|
||||||
|
-- 'Backlinks.normaliseUrl': strip a trailing @index.html@ (keeping the
|
||||||
|
-- directory slash) before the bare @.html@ extension, so the keys this
|
||||||
|
-- produces match the keys written into @data/backlinks.json@.
|
||||||
normUrl :: String -> String
|
normUrl :: String -> String
|
||||||
normUrl u
|
normUrl u
|
||||||
|
| "index.html" `isSuffixOf` u = take (length u - 10) u
|
||||||
| ".html" `isSuffixOf` u = take (length u - 5) u
|
| ".html" `isSuffixOf` u = take (length u - 5) u
|
||||||
| otherwise = u
|
| otherwise = u
|
||||||
|
|
||||||
pad2 :: (Show a, Integral a) => a -> String
|
pad2 :: (Show a, Integral a) => a -> String
|
||||||
pad2 n = if n < 10 then "0" ++ show n else show n
|
pad2 n = if n < 10 then "0" ++ show n else show n
|
||||||
|
|
||||||
-- | Median of a non-empty list; returns 0 for empty.
|
-- | Median of a non-empty list; returns 0 for empty. An even-length
|
||||||
|
-- list takes the mean of the two middle elements, rounded to the
|
||||||
|
-- nearest unit.
|
||||||
median :: [Int] -> Int
|
median :: [Int] -> Int
|
||||||
median [] = 0
|
median [] = 0
|
||||||
median xs = sort xs !! (length xs `div` 2)
|
median xs
|
||||||
-- Index is < length xs for non-empty xs, so '(!!)' is safe here
|
| odd n = upper
|
||||||
-- by construction. The empty case is caught by the first equation.
|
| otherwise = (lower + upper + 1) `div` 2
|
||||||
|
where
|
||||||
|
-- Indexes are in range for non-empty xs (lower is consulted only
|
||||||
|
-- when n >= 2), so '(!!)' is safe here by construction. The empty
|
||||||
|
-- case is caught by the first equation.
|
||||||
|
sorted = sort xs
|
||||||
|
n = length sorted
|
||||||
|
upper = sorted !! (n `div` 2)
|
||||||
|
lower = sorted !! (n `div` 2 - 1)
|
||||||
|
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
@ -755,7 +769,7 @@ renderArchive metrics =
|
||||||
dl [ (k, txt v) | (k, v) <- metrics ]
|
dl [ (k, txt v) | (k, v) <- metrics ]
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
-- Static TOC (matches the nine h2 sections above)
|
-- Static TOC (matches the eleven h2 sections above)
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
pageTOC :: H.Html
|
pageTOC :: H.Html
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue