From a358c8b2464ff92badfa95842b5f145ece2285fc Mon Sep 17 00:00:00 2001 From: Levi Neuwirth Date: Fri, 10 Apr 2026 17:40:49 -0400 Subject: [PATCH 1/4] audit: Haskell build system correctness + Patterns.hs + Stats blaze rewrite --- build/Authors.hs | 45 +-- build/Backlinks.hs | 44 ++- build/Catalog.hs | 57 ++- build/Citations.hs | 11 +- build/Commonplace.hs | 6 +- build/Compilers.hs | 13 +- build/Contexts.hs | 168 ++++++-- build/Filters.hs | 29 +- build/Filters/EmbedPdf.hs | 12 +- build/Filters/Images.hs | 139 ++++--- build/Filters/Links.hs | 39 +- build/Filters/Score.hs | 44 ++- build/Filters/Sidenotes.hs | 52 ++- build/Filters/Smallcaps.hs | 9 +- build/Filters/Transclusion.hs | 35 +- build/Filters/Typography.hs | 20 +- build/Filters/Viz.hs | 31 +- build/Filters/Wikilinks.hs | 37 +- build/Metadata.hs | 2 - build/Patterns.hs | 100 +++++ build/SimilarLinks.hs | 26 +- build/Site.hs | 62 ++- build/Stability.hs | 59 ++- build/Stats.hs | 705 +++++++++++++++++++++------------- build/Tags.hs | 25 +- build/Utils.hs | 56 ++- cabal.project.freeze | 4 +- levineuwirth.cabal | 8 +- 28 files changed, 1290 insertions(+), 548 deletions(-) delete mode 100644 build/Metadata.hs create mode 100644 build/Patterns.hs diff --git a/build/Authors.hs b/build/Authors.hs index 393fb5b..ea30c6b 100644 --- a/build/Authors.hs +++ b/build/Authors.hs @@ -12,32 +12,29 @@ module Authors ( buildAllAuthors , applyAuthorRules - , authorLinksField ) where -import Data.Char (isAlphaNum, toLower) import Data.Maybe (fromMaybe) import Hakyll -import Hakyll.Core.Metadata (lookupStringList) import Pagination (sortAndGroup) -import Tags (tagLinksField) +import Patterns (authorIndexable) +import Contexts (abstractField, tagLinksField) +import Utils (authorSlugify, authorNameOf) -- --------------------------------------------------------------------------- -- Slug helpers +-- +-- The slugify and nameOf helpers used to live here in their own +-- definitions; they now defer to 'Utils' so that they cannot drift from +-- the 'Contexts' versions on Unicode edge cases. -- --------------------------------------------------------------------------- --- | Lowercase, replace spaces with hyphens, strip anything else. slugify :: String -> String -slugify = map (\c -> if c == ' ' then '-' else c) - . filter (\c -> isAlphaNum c || c == ' ') - . map toLower +slugify = authorSlugify --- | Extract the author name from a "Name | url" entry, trimming whitespace. nameOf :: String -> String -nameOf s = strip $ case break (== '|') s of { (n, _) -> n } - where - strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ') +nameOf = authorNameOf -- --------------------------------------------------------------------------- @@ -47,8 +44,10 @@ nameOf s = strip $ case break (== '|') s of { (n, _) -> n } defaultAuthor :: String defaultAuthor = "Levi Neuwirth" +-- | Content patterns indexed by author. Sourced from 'Patterns.authorIndexable' +-- so this stays in lockstep with Tags.hs and Backlinks.hs. allContent :: Pattern -allContent = ("content/essays/*.md" .||. "content/blog/*.md") .&&. hasNoVersion +allContent = authorIndexable -- --------------------------------------------------------------------------- @@ -103,25 +102,7 @@ applyAuthorRules authors baseCtx = tagsRules authors $ \name pat -> do where itemCtx = dateField "date" "%-d %B %Y" <> tagLinksField "item-tags" + <> abstractField <> defaultContext --- --------------------------------------------------------------------------- --- Context field --- --------------------------------------------------------------------------- - --- | Exposes each item's authors as @author-name@ / @author-url@ pairs. --- All links point to /authors/{slug}/, regardless of any URL in frontmatter. --- Defaults to Levi Neuwirth when no "authors" frontmatter key is present. --- --- Usage in templates: --- $for(author-links)$$author-name$$sep$, $endfor$ -authorLinksField :: Context a -authorLinksField = listFieldWith "author-links" ctx $ \item -> do - meta <- getMetadata (itemIdentifier item) - let entries = fromMaybe [] (lookupStringList "authors" meta) - names = if null entries then [defaultAuthor] else map nameOf entries - return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ slugify n ++ "/")) names - where - ctx = field "author-name" (return . fst . itemBody) - <> field "author-url" (return . snd . itemBody) diff --git a/build/Backlinks.hs b/build/Backlinks.hs index e364b81..0f37ef5 100644 --- a/build/Backlinks.hs +++ b/build/Backlinks.hs @@ -32,10 +32,12 @@ import Data.Ord (comparing) import Data.Maybe (fromMaybe) import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) +import qualified Data.ByteString as BS import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import qualified Data.Text.Encoding as TE +import qualified Data.Text.Encoding.Error as TE import qualified Data.Aeson as Aeson import Data.Aeson ((.=)) import Text.Pandoc.Class (runPure) @@ -47,6 +49,7 @@ import Text.Pandoc.Walk (query) import Hakyll import Compilers (readerOpts, writerOpts) import Filters (preprocessSource) +import qualified Patterns as P -- --------------------------------------------------------------------------- -- Link-with-context entry (intermediate, saved by the "links" pass) @@ -184,28 +187,47 @@ linksCompiler = do -- --------------------------------------------------------------------------- -- | Normalise an internal URL as a map key: strip query string, fragment, --- and trailing @.html@; ensure a leading slash. +-- and trailing @.html@; ensure a leading slash; percent-decode the path +-- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same +-- key. normaliseUrl :: String -> String normaliseUrl url = let t = T.pack url t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t))) t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1 t3 = fromMaybe t2 (T.stripSuffix ".html" t2) - in T.unpack t3 + in percentDecode (T.unpack t3) + +-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the +-- resulting bytestring as UTF-8. Invalid escapes are passed through +-- verbatim so this is safe to call on already-decoded input. +percentDecode :: String -> String +percentDecode = T.unpack . TE.decodeUtf8With lenientDecode . pack . go + where + go [] = [] + go ('%':a:b:rest) + | Just hi <- hexDigit a + , Just lo <- hexDigit b + = fromIntegral (hi * 16 + lo) : go rest + go (c:rest) = fromIntegral (fromEnum c) : go rest + + hexDigit c + | c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0') + | c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10) + | c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10) + | otherwise = Nothing + + pack = BS.pack + lenientDecode = TE.lenientDecode -- --------------------------------------------------------------------------- --- Content patterns (must match the rules in Site.hs) +-- Content patterns (must match the rules in Site.hs — sourced from +-- Patterns.allContent so additions to the canonical list automatically +-- propagate to backlinks). -- --------------------------------------------------------------------------- allContent :: Pattern -allContent = - "content/essays/*.md" - .||. "content/essays/*/index.md" - .||. "content/blog/*.md" - .||. "content/poetry/*.md" - .||. "content/fiction/*.md" - .||. "content/music/*/index.md" - .||. "content/*.md" +allContent = P.allContent -- --------------------------------------------------------------------------- -- Hakyll rules diff --git a/build/Catalog.hs b/build/Catalog.hs index 5912f3f..85039a8 100644 --- a/build/Catalog.hs +++ b/build/Catalog.hs @@ -7,7 +7,8 @@ module Catalog ( musicCatalogCtx ) where -import Data.List (groupBy, sortBy) +import Data.Char (isSpace, toLower) +import Data.List (groupBy, isPrefixOf, sortBy) import Data.Maybe (fromMaybe) import Data.Ord (comparing) import Data.Aeson (Value (..)) @@ -15,7 +16,6 @@ import qualified Data.Aeson.KeyMap as KM import qualified Data.Vector as V import qualified Data.Text as T import Hakyll -import Hakyll.Core.Metadata (lookupStringList) import Contexts (siteCtx) -- --------------------------------------------------------------------------- @@ -115,6 +115,40 @@ parseCatalogEntry item = do -- --------------------------------------------------------------------------- -- HTML rendering -- --------------------------------------------------------------------------- +-- +-- Trust model: per the site convention (see also Stats.hs:pageLink), +-- frontmatter @title@ values are author-controlled trusted HTML and may +-- contain inline markup such as @...@. They are emitted +-- pre-escaped — but we still escape every other interpolated frontmatter +-- value (year, duration, instrumentation) and sanitize hrefs through +-- 'safeHref', so a stray @<@ in those fields cannot break the markup. + +-- | Defense-in-depth href sanitiser. Mirrors 'Stats.isSafeUrl'. +safeHref :: String -> String +safeHref u = + let norm = map toLower (dropWhile isSpace u) + in if not ("//" `isPrefixOf` norm) + && any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"] + then escAttr u + else "#" + +escAttr :: String -> String +escAttr = concatMap esc + where + esc '&' = "&" + esc '<' = "<" + esc '>' = ">" + esc '"' = """ + esc '\'' = "'" + esc c = [c] + +escText :: String -> String +escText = concatMap esc + where + esc '&' = "&" + esc '<' = "<" + esc '>' = ">" + esc c = [c] renderIndicators :: CatalogEntry -> String renderIndicators e = concatMap render @@ -129,19 +163,21 @@ renderEntry :: CatalogEntry -> String renderEntry e = concat [ "
  • " , "
    " - , "", ceTitle e, "" + , "" + , ceTitle e + , "" , renderIndicators e - , maybe "" (\y -> "" ++ y ++ "") (ceYear e) - , maybe "" (\d -> "" ++ d ++ "") (ceDuration e) + , maybe "" (\y -> "" ++ escText y ++ "") (ceYear e) + , maybe "" (\d -> "" ++ escText d ++ "") (ceDuration e) , "
    " - , maybe "" (\i -> "
    " ++ i ++ "
    ") (ceInstrumentation e) + , maybe "" (\i -> "
    " ++ escText i ++ "
    ") (ceInstrumentation e) , "
  • " ] renderCategorySection :: String -> [CatalogEntry] -> String renderCategorySection cat entries = concat [ "
    " - , "

    ", categoryLabel cat, "

    " + , "

    ", escText (categoryLabel cat), "

    " , "" @@ -191,7 +227,12 @@ catalogByCategoryField = field "catalog-by-category" $ \_ -> do else do let sorted = sortBy (comparing (categoryRank . ceCategory)) entries grouped = groupBy (\a b -> ceCategory a == ceCategory b) sorted - return $ concatMap (\g -> renderCategorySection (ceCategory (head g)) g) grouped + return $ concatMap renderGroup grouped + where + -- groupBy on a non-empty list yields non-empty sublists, but pattern + -- matching is total whereas 'head' is not. + renderGroup [] = "" + renderGroup g@(e : _) = renderCategorySection (ceCategory e) g musicCatalogCtx :: Context String musicCatalogCtx = diff --git a/build/Citations.hs b/build/Citations.hs index a99aad9..f7fbc8e 100644 --- a/build/Citations.hs +++ b/build/Citations.hs @@ -137,9 +137,14 @@ transformInline :: Map Text Int -> Inline -> Inline transformInline keyNums (Cite citations _) = let keys = map citationId citations nums = mapMaybe (`Map.lookup` keyNums) keys - in if null nums - then Str "" - else RawInline "html" (markerHtml keys (head keys) (head nums) nums) + in case (keys, nums) of + -- Both lists are guaranteed non-empty by the @null nums@ check + -- below, but pattern-match to keep this total instead of + -- relying on @head@. + (firstKey : _, firstNum : _) -> + RawInline "html" (markerHtml keys firstKey firstNum nums) + _ -> + Str "" transformInline _ x = x markerHtml :: [Text] -> Text -> Int -> [Int] -> Text diff --git a/build/Commonplace.hs b/build/Commonplace.hs index d05d4e0..62ac381 100644 --- a/build/Commonplace.hs +++ b/build/Commonplace.hs @@ -125,9 +125,9 @@ renderThemedView entries = renderChronoView :: [CPEntry] -> String renderChronoView entries = "" where sorted = sortBy (comparing (Down . cpDateAdded)) entries diff --git a/build/Compilers.hs b/build/Compilers.hs index 6c5a5e3..dfd3640 100644 --- a/build/Compilers.hs +++ b/build/Compilers.hs @@ -12,7 +12,6 @@ module Compilers ) where import Hakyll -import Hakyll.Core.Metadata (lookupStringList, lookupString) import Text.Pandoc.Definition (Pandoc (..), Block (..), Inline (..)) import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..), @@ -158,7 +157,9 @@ essayCompilerWith rOpts = do Viz.inlineViz srcDir pandocWithScores -- Apply remaining AST-level filters (sidenotes, smallcaps, links, etc.). - let pandocFiltered = applyAll pandocWithViz + -- applyAll touches the filesystem via Images.apply (webp existence + -- check), so it runs through unsafeCompiler. + pandocFiltered <- unsafeCompiler $ applyAll srcDir pandocWithViz let pandocItem' = itemSetBody pandocFiltered pandocItem -- Build TOC from the filtered AST. @@ -205,8 +206,12 @@ pageCompiler = do body <- getResourceBody let src = itemBody body body' = itemSetBody (preprocessSource src) body - pandocItem <- fmap (fmap applyAll) (readPandocWith readerOpts body') - let htmlItem = writePandocWith writerOpts pandocItem + filePath <- getResourceFilePath + let srcDir = takeDirectory filePath + pandocItem <- readPandocWith readerOpts body' + pandocFiltered <- unsafeCompiler $ applyAll srcDir (itemBody pandocItem) + let pandocItem' = itemSetBody pandocFiltered pandocItem + let htmlItem = writePandocWith writerOpts pandocItem' _ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem) _ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem) return htmlItem diff --git a/build/Contexts.hs b/build/Contexts.hs index 937312c..bc18d42 100644 --- a/build/Contexts.hs +++ b/build/Contexts.hs @@ -9,13 +9,16 @@ module Contexts , fictionCtx , compositionCtx , contentKindField + , abstractField + , tagLinksField + , authorLinksField ) where import Data.Aeson (Value (..)) import qualified Data.Aeson.KeyMap as KM import qualified Data.Vector as V -import Data.List (isPrefixOf) -import Data.Maybe (catMaybes, fromMaybe) +import Data.List (intercalate, isPrefixOf) +import Data.Maybe (fromMaybe) import Data.Time.Calendar (toGregorian) import Data.Time.Clock (getCurrentTime, utctDay) import Data.Time.Format (formatTime, defaultTimeLocale) @@ -24,13 +27,11 @@ import Text.Read (readMaybe) import qualified Data.Text as T import Text.Pandoc (runPure, readMarkdown, writeHtml5String, Pandoc(..), Block(..), Inline(..)) import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) -import Hakyll -import Hakyll.Core.Metadata (lookupStringList) -import Authors (authorLinksField) +import Hakyll hiding (trim) import Backlinks (backlinksField) import SimilarLinks (similarLinksField) import Stability (stabilityField, lastReviewedField, versionHistoryField) -import Tags (tagLinksField) +import Utils (authorSlugify, authorNameOf, trim) -- --------------------------------------------------------------------------- -- Affiliation field @@ -96,13 +97,13 @@ contentKindField = field "item-kind" $ \item -> do r <- getRoute (itemIdentifier item) return $ case r of Nothing -> "Page" - Just route - | "essays/" `isPrefixOf` route -> "Essay" - | "blog/" `isPrefixOf` route -> "Post" - | "poetry/" `isPrefixOf` route -> "Poem" - | "fiction/" `isPrefixOf` route -> "Fiction" - | "music/" `isPrefixOf` route -> "Composition" - | otherwise -> "Page" + Just r' + | "essays/" `isPrefixOf` r' -> "Essay" + | "blog/" `isPrefixOf` r' -> "Post" + | "poetry/" `isPrefixOf` r' -> "Poem" + | "fiction/" `isPrefixOf` r' -> "Fiction" + | "music/" `isPrefixOf` r' -> "Composition" + | otherwise -> "Page" -- --------------------------------------------------------------------------- -- Site-wide context @@ -112,22 +113,79 @@ contentKindField = field "item-kind" $ \item -> do -- in the @js:@ frontmatter key (accepts a scalar string or a YAML list). -- Returns an empty list when absent; $for iterates zero times, emitting nothing. -- NOTE: do not use fail here — $for does not catch noResult the way $if does. +-- +-- Each child Item is keyed on @#js-@ so that two +-- pages referencing the same script path (e.g. @shared.js@) do not collide +-- in Hakyll's item store. pageScriptsField :: Context String pageScriptsField = listFieldWith "page-scripts" ctx $ \item -> do meta <- getMetadata (itemIdentifier item) let scripts = case lookupStringList "js" meta of Just xs -> xs Nothing -> maybe [] (:[]) (lookupString "js" meta) - return $ map (\s -> Item (fromFilePath s) s) scripts + parent = toFilePath (itemIdentifier item) + return $ zipWith + (\i s -> Item (fromFilePath (parent ++ "#js-" ++ show (i :: Int))) s) + [0 ..] + scripts where ctx = field "script-src" (return . itemBody) +-- --------------------------------------------------------------------------- +-- Tag links field +-- --------------------------------------------------------------------------- + +-- | List context field exposing an item's own (non-expanded) tags as +-- @tag-name@ / @tag-url@ objects. +-- +-- $for(essay-tags)$$tag-name$$endfor$ +tagLinksField :: String -> Context a +tagLinksField fieldName = listFieldWith fieldName ctx $ \item -> + map toItem <$> getTags (itemIdentifier item) + where + toItem t = Item (fromFilePath (t ++ "/index.html")) t + ctx = field "tag-name" (return . itemBody) + <> field "tag-url" (\i -> return $ "/" ++ itemBody i ++ "/") + +-- --------------------------------------------------------------------------- +-- Author links field +-- --------------------------------------------------------------------------- +-- +-- 'authorSlugify' and 'authorNameOf' are imported from 'Utils' so that +-- they cannot drift from the copies in 'Authors'. + +-- | Exposes each item's authors as @author-name@ / @author-url@ pairs. +-- Defaults to Levi Neuwirth when no "authors" frontmatter key is present. +-- +-- Entries that produce an empty name (e.g. @"| https://url"@) or an empty +-- slug (e.g. all-punctuation names) are dropped, so the field never emits +-- a @/authors//@ link. +-- +-- $for(author-links)$$author-name$$sep$, $endfor$ +authorLinksField :: Context a +authorLinksField = listFieldWith "author-links" ctx $ \item -> do + meta <- getMetadata (itemIdentifier item) + let entries = fromMaybe [] (lookupStringList "authors" meta) + rawNames = if null entries then ["Levi Neuwirth"] else map authorNameOf entries + validNames = filter (\n -> not (null n) && not (null (authorSlugify n))) rawNames + names = if null validNames then ["Levi Neuwirth"] else validNames + return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ authorSlugify n ++ "/")) names + where + ctx = field "author-name" (return . fst . itemBody) + <> field "author-url" (return . snd . itemBody) + -- --------------------------------------------------------------------------- -- Abstract field -- --------------------------------------------------------------------------- -- | Renders the abstract using Pandoc to support Markdown and LaTeX math. --- Strips the outer

    tag if the abstract is a single paragraph. +-- Strips the outer @

    @ wrapping. A single-paragraph abstract becomes a +-- bare @Plain@ so the rendered HTML is unwrapped inlines. A multi-paragraph +-- abstract (author used a blank line in the YAML literal block) is flattened +-- to a single @Plain@ with @LineBreak@ separators between what were +-- originally paragraph boundaries — the visual break is preserved without +-- emitting stray @

    @ tags inside the metadata block. Mixed block content +-- (e.g. an abstract containing a blockquote) falls through unchanged. abstractField :: Context String abstractField = field "abstract" $ \item -> do meta <- getMetadata (itemIdentifier item) @@ -138,12 +196,20 @@ abstractField = field "abstract" $ \item -> do doc <- readMarkdown defaultHakyllReaderOptions (T.pack src) let doc' = case doc of Pandoc m [Para ils] -> Pandoc m [Plain ils] - _ -> doc + Pandoc m blocks + | all isPara blocks && not (null blocks) -> + let joined = intercalate [LineBreak] + [ils | Para ils <- blocks] + in Pandoc m [Plain joined] + _ -> doc let wOpts = defaultHakyllWriterOptions { writerHTMLMathMethod = MathML } writeHtml5String wOpts doc' case pandocResult of Left err -> fail $ "Pandoc error rendering abstract: " ++ show err Right html -> return (T.unpack html) + where + isPara (Para _) = True + isPara _ = False siteCtx :: Context String siteCtx = @@ -208,21 +274,37 @@ dotsField ctxKey metaKey = field ctxKey $ \item -> do -- | @$confidence-trend$@: ↑, ↓, or → derived from the last two entries -- in the @confidence-history@ frontmatter list. Returns @noResult@ when -- there is no history or only a single entry. +-- +-- The arrow flips when the absolute change crosses 'trendThreshold' +-- (currently 5 percentage points). Smaller swings count as flat. confidenceTrendField :: Context String confidenceTrendField = field "confidence-trend" $ \item -> do meta <- getMetadata (itemIdentifier item) case lookupStringList "confidence-history" meta of - Nothing -> fail "no confidence history" - Just xs | length xs < 2 -> fail "no confidence history" - Just xs -> - let prev = readMaybe (xs !! (length xs - 2)) :: Maybe Int - cur = readMaybe (last xs) :: Maybe Int - in case (prev, cur) of - (Just p, Just c) - | c - p > 5 -> return "\x2191" -- ↑ - | p - c > 5 -> return "\x2193" -- ↓ - | otherwise -> return "\x2192" -- → - _ -> return "\x2192" + Nothing -> fail "no confidence history" + Just xs -> case lastTwo xs of + Nothing -> fail "no confidence history" + Just (prevS, curS) -> + let prev = readMaybe prevS :: Maybe Int + cur = readMaybe curS :: Maybe Int + in case (prev, cur) of + (Just p, Just c) + | c - p > trendThreshold -> return "\x2191" -- ↑ + | p - c > trendThreshold -> return "\x2193" -- ↓ + | otherwise -> return "\x2192" -- → + _ -> return "\x2192" + where + trendThreshold :: Int + trendThreshold = 5 + + -- Total replacement for @(xs !! (length xs - 2), last xs)@: returns + -- the last two elements of a list, in order, or 'Nothing' when the + -- list has fewer than two entries. + lastTwo :: [a] -> Maybe (a, a) + lastTwo [] = Nothing + lastTwo [_] = Nothing + lastTwo [a, b] = Just (a, b) + lastTwo (_ : rest) = lastTwo rest -- | @$overall-score$@: weighted composite of confidence (50 %), -- evidence quality (30 %), and importance (20 %), expressed as an @@ -332,12 +414,27 @@ data Movement = Movement , movAudio :: Maybe String } -parseMovements :: Metadata -> [Movement] -parseMovements meta = +-- | Parse the @movements@ frontmatter key. Returns parsed movements and a +-- list of human-readable warnings for any entries that failed to parse. +-- Callers can surface the warnings via 'unsafeCompiler' so silent typos +-- don't strip movements without diagnostic. +parseMovementsWithWarnings :: Metadata -> ([Movement], [String]) +parseMovementsWithWarnings meta = case KM.lookup "movements" meta of - Just (Array v) -> catMaybes $ map parseOne (V.toList v) - _ -> [] + Just (Array v) -> + let results = zipWith parseIndexed [1 :: Int ..] (V.toList v) + in ( [m | Right m <- results] + , [w | Left w <- results] + ) + _ -> ([], []) where + parseIndexed i value = + case parseOne value of + Just m -> Right m + Nothing -> Left $ + "movement #" ++ show i ++ " is missing a required field " + ++ "(name, page, or duration) — entry skipped" + parseOne (Object o) = Movement <$> (getString =<< KM.lookup "name" o) <*> (getInt =<< KM.lookup "page" o) @@ -351,6 +448,9 @@ parseMovements meta = getInt (Number n) = Just (floor (fromRational (toRational n) :: Double)) getInt _ = Nothing +parseMovements :: Metadata -> [Movement] +parseMovements = fst . parseMovementsWithWarnings + -- | Extract the composition slug from an item's identifier. -- "content/music/symphonic-dances/index.md" → "symphonic-dances" compSlug :: Item a -> String @@ -410,7 +510,11 @@ compositionCtx = movementsListField = listFieldWith "movements" movCtx $ \item -> do meta <- getMetadata (itemIdentifier item) - let mvs = parseMovements meta + let (mvs, warnings) = parseMovementsWithWarnings meta + ident = toFilePath (itemIdentifier item) + unsafeCompiler $ mapM_ + (\w -> putStrLn $ "[Movements] " ++ ident ++ ": " ++ w) + warnings return $ zipWith (\idx mv -> Item (fromFilePath ("mv" ++ show (idx :: Int))) mv) [1..] mvs diff --git a/build/Filters.hs b/build/Filters.hs index d2c13e7..9f00073 100644 --- a/build/Filters.hs +++ b/build/Filters.hs @@ -22,16 +22,25 @@ import qualified Filters.Images as Images -- | Apply all AST-level filters in pipeline order. -- Run on the Pandoc document after reading, before writing. -applyAll :: Pandoc -> Pandoc -applyAll - = Sidenotes.apply - . Typography.apply - . Links.apply - . Smallcaps.apply - . Dropcaps.apply - . Math.apply - . Code.apply - . Images.apply +-- +-- 'Filters.Images.apply' is the only IO-performing filter (it probes the +-- filesystem for @.webp@ companions before deciding whether to emit +-- @@). It runs first — i.e. innermost in the composition — and +-- every downstream filter stays pure. @srcDir@ is the directory of the +-- source Markdown file, passed through to Images for relative-path +-- resolution of co-located assets. +applyAll :: FilePath -> Pandoc -> IO Pandoc +applyAll srcDir doc = do + imagesDone <- Images.apply srcDir doc + pure + . Sidenotes.apply + . Typography.apply + . Links.apply + . Smallcaps.apply + . Dropcaps.apply + . Math.apply + . Code.apply + $ imagesDone -- | Apply source-level preprocessors to the raw Markdown string. -- Order matters: EmbedPdf must run before Transclusion, because the diff --git a/build/Filters/EmbedPdf.hs b/build/Filters/EmbedPdf.hs index 5c28716..f2a11f9 100644 --- a/build/Filters/EmbedPdf.hs +++ b/build/Filters/EmbedPdf.hs @@ -16,6 +16,7 @@ module Filters.EmbedPdf (preprocess) where import Data.Char (isDigit) import Data.List (isPrefixOf, isSuffixOf) +import qualified Utils as U -- | Apply PDF-embed substitution to the raw Markdown source string. preprocess :: String -> String @@ -23,7 +24,7 @@ preprocess = unlines . map processLine . lines processLine :: String -> String processLine line = - case parseDirective (trim line) of + case parseDirective (U.trim line) of Nothing -> line Just (filePath, pageHash) -> renderEmbed filePath pageHash @@ -64,7 +65,9 @@ renderEmbed filePath pageHash = -- | Percent-encode characters that would break a query-string value. -- Slashes are left unencoded so root-relative paths remain readable and --- work correctly with PDF.js's internal fetch. +-- work correctly with PDF.js's internal fetch. @#@ is encoded for +-- defense-in-depth even though the directive parser already splits on it +-- before this function is called. encodeQueryValue :: String -> String encodeQueryValue = concatMap enc where @@ -73,9 +76,6 @@ encodeQueryValue = concatMap enc enc '?' = "%3F" enc '+' = "%2B" enc '"' = "%22" + enc '#' = "%23" enc c = [c] --- | Strip leading and trailing spaces. -trim :: String -> String -trim = f . f - where f = reverse . dropWhile (== ' ') diff --git a/build/Filters/Images.hs b/build/Filters/Images.hs index d75ba16..f81ccc2 100644 --- a/build/Filters/Images.hs +++ b/build/Filters/Images.hs @@ -2,53 +2,93 @@ {-# LANGUAGE OverloadedStrings #-} -- | Image filter: lazy loading, lightbox markers, and WebP wrappers. -- --- For local raster images (JPG, JPEG, PNG, GIF), emits a @@ element --- with a WebP @@ and the original format as the @@ fallback. --- tools/convert-images.sh produces the companion .webp files at build time. +-- For local raster images (JPG, JPEG, PNG, GIF) whose @.webp@ companion +-- exists on disk at build time, emits a @@ element with a WebP +-- @@ and the original format as the @@ fallback. When the +-- webp companion is absent (cwebp not installed, @convert-images.sh@ not +-- yet run, or a single file missed), the filter emits a plain @@ so +-- the image still renders. This matters because browsers do NOT fall back +-- from a 404'd @@ inside @@ to the nested @@ — the +-- source is selected up front and a broken one leaves the area blank. +-- +-- @tools/convert-images.sh@ produces the companion .webp files at build +-- time. When cwebp is not installed the script is a no-op, and this +-- filter degrades gracefully to plain @@. -- -- SVG files and external URLs are passed through with only lazy loading -- (and lightbox markers for standalone images). module Filters.Images (apply) where import Data.Char (toLower) +import Data.List (isPrefixOf) import Data.Text (Text) import qualified Data.Text as T -import System.FilePath (replaceExtension) +import System.Directory (doesFileExist) +import System.FilePath (replaceExtension, takeExtension, ()) import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk) +import Text.Pandoc.Walk (walkM) +import qualified Utils as U -- | Apply image attribute injection and WebP wrapping to the entire document. -apply :: Pandoc -> Pandoc -apply = walk transformInline +-- +-- @srcDir@ is the directory of the source Markdown file, used to resolve +-- relative image paths when probing for the corresponding @.webp@ +-- companion file. Absolute paths (leading @/@) are resolved against +-- @static/@ instead, matching the layout @convert-images.sh@ writes to. +apply :: FilePath -> Pandoc -> IO Pandoc +apply srcDir = walkM (transformInline srcDir) -- --------------------------------------------------------------------------- -- Core transformation -- --------------------------------------------------------------------------- -transformInline :: Inline -> Inline -transformInline (Link lAttr ils lTarget) = +transformInline :: FilePath -> Inline -> IO Inline +transformInline srcDir (Link lAttr ils lTarget) = do -- Recurse into link contents; images inside a link get no lightbox marker. - Link lAttr (map wrapLinkedImg ils) lTarget - where - wrapLinkedImg (Image iAttr alt iTarget) = renderImg iAttr alt iTarget False - wrapLinkedImg x = x -transformInline (Image attr alt target) = - renderImg attr alt target True -transformInline x = x + ils' <- mapM (wrapLinkedImg srcDir) ils + pure (Link lAttr ils' lTarget) +transformInline srcDir (Image attr alt target) = + renderImg srcDir attr alt target True +transformInline _ x = pure x + +wrapLinkedImg :: FilePath -> Inline -> IO Inline +wrapLinkedImg srcDir (Image iAttr alt iTarget) = + renderImg srcDir iAttr alt iTarget False +wrapLinkedImg _ x = pure x -- | Dispatch on image type: --- * Local raster → @@ with WebP @@ --- * Everything else → plain @@ with loading/lightbox attrs -renderImg :: Attr -> [Inline] -> Target -> Bool -> Inline -renderImg attr alt target@(src, _) lightbox - | isLocalRaster (T.unpack src) = - RawInline (Format "html") (renderPicture attr alt target lightbox) +-- * Local raster with webp companion on disk → @@ with WebP @@ +-- * Local raster without companion → plain @@ (graceful degradation) +-- * Everything else (SVG, URL) → plain @@ with loading/lightbox attrs +renderImg :: FilePath -> Attr -> [Inline] -> Target -> Bool -> IO Inline +renderImg srcDir attr alt target@(src, _) lightbox + | isLocalRaster (T.unpack src) = do + hasWebp <- doesFileExist (webpPhysicalPath srcDir src) + if hasWebp + then pure $ RawInline (Format "html") + (renderPicture attr alt target lightbox) + else pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) + alt target | otherwise = - Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target + pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target where addLightbox True a = addAttr "data-lightbox" "true" a addLightbox False a = a +-- | Physical on-disk path of the @.webp@ companion for a Markdown image src. +-- +-- Absolute paths (@/images/foo.jpg@) resolve under @static/@ because that +-- is where Hakyll's static-asset rule writes them from. Relative paths +-- resolve against the source file's directory, where Pandoc already +-- expects co-located assets to live. +webpPhysicalPath :: FilePath -> Text -> FilePath +webpPhysicalPath srcDir src = + let s = T.unpack src + physical = if "/" `isPrefixOf` s + then "static" ++ s + else srcDir s + in replaceExtension physical ".webp" + -- --------------------------------------------------------------------------- -- rendering -- --------------------------------------------------------------------------- @@ -73,8 +113,13 @@ renderPicture (ident, classes, kvs) alt (src, title) lightbox = ] where webpSrc = replaceExtension (T.unpack src) ".webp" - -- Strip attrs we handle explicitly so they don't appear twice. - passedKvs = filter (\(k, _) -> k `notElem` ["loading", "data-lightbox"]) kvs + -- Strip attrs we handle explicitly above (id/class/alt/title) and the + -- attrs we always emit ourselves (loading, data-lightbox), so they don't + -- appear twice on the . + passedKvs = filter + (\(k, _) -> k `notElem` + ["loading", "data-lightbox", "id", "class", "alt", "title", "src"]) + kvs attrId :: Text -> Text attrId t = if T.null t then "" else " id=\"" <> esc t <> "\"" @@ -103,11 +148,11 @@ isLocalRaster src = not (isUrl src) && lowerExt src `elem` [".jpg", ".jpeg", ".p isUrl :: String -> Bool isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"] - where isPrefixOf pfx str = take (length pfx) str == pfx -- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg"). +-- Returns the empty string for paths with no extension. lowerExt :: FilePath -> String -lowerExt = map toLower . reverse . ('.' :) . takeWhile (/= '.') . tail . dropWhile (/= '.') . reverse +lowerExt = map toLower . takeExtension -- | Prepend a key=value pair if not already present. addAttr :: Text -> Text -> Attr -> Attr @@ -119,24 +164,28 @@ addAttr k v (i, cs, kvs) stringify :: [Inline] -> Text stringify = T.concat . map go where - go (Str t) = t - go Space = " " - go SoftBreak = " " - go LineBreak = " " - go (Emph ils) = stringify ils - go (Strong ils) = stringify ils - go (Code _ t) = t - go (Link _ ils _) = stringify ils - go (Image _ ils _) = stringify ils - go (Span _ ils) = stringify ils - go _ = "" + go (Str t) = t + go Space = " " + go SoftBreak = " " + go LineBreak = " " + go (Emph ils) = stringify ils + go (Strong ils) = stringify ils + go (Strikeout ils) = stringify ils + go (Superscript ils) = stringify ils + go (Subscript ils) = stringify ils + go (SmallCaps ils) = stringify ils + go (Underline ils) = stringify ils + go (Quoted _ ils) = stringify ils + go (Cite _ ils) = stringify ils + go (Code _ t) = t + go (Math _ t) = t + go (RawInline _ _) = "" + go (Link _ ils _) = stringify ils + go (Image _ ils _) = stringify ils + go (Span _ ils) = stringify ils + go (Note _) = "" -- | HTML-escape a text value for use in attribute values. +-- Defers to the canonical 'Utils.escapeHtmlText'. esc :: Text -> Text -esc = T.concatMap escChar - where - escChar '&' = "&" - escChar '<' = "<" - escChar '>' = ">" - escChar '"' = """ - escChar c = T.singleton c +esc = U.escapeHtmlText diff --git a/build/Filters/Links.hs b/build/Filters/Links.hs index 7a0c3dd..146aef2 100644 --- a/build/Filters/Links.hs +++ b/build/Filters/Links.hs @@ -25,14 +25,20 @@ apply = walk classifyLink . walk classifyPdfLink -- Preserves the original path in @data-pdf-src@ so the popup thumbnail -- provider can locate the corresponding @.thumb.png@ file. -- Skips links that are already pointing at the viewer (idempotent). +-- +-- Handles fragment identifiers (e.g. @/papers/foo.pdf#page=5@): the +-- fragment is stripped before the @.pdf@ suffix check and re-attached +-- after the viewer URL so PDF.js's anchor handling works. classifyPdfLink :: Inline -> Inline classifyPdfLink (Link (ident, classes, kvs) ils (url, title)) | "/" `T.isPrefixOf` url - , ".pdf" `T.isSuffixOf` T.toLower url + , let (path, fragment) = T.break (== '#') url + , ".pdf" `T.isSuffixOf` T.toLower path , "pdf-link" `notElem` classes = - let viewerUrl = "/pdfjs/web/viewer.html?file=" <> encodeQueryValue url + let viewerUrl = "/pdfjs/web/viewer.html?file=" + <> encodeQueryValue path <> fragment classes' = classes ++ ["pdf-link"] - kvs' = kvs ++ [("data-pdf-src", url)] + kvs' = kvs ++ [("data-pdf-src", path)] in Link (ident, classes', kvs') ils (viewerUrl, title) classifyPdfLink x = x @@ -53,10 +59,33 @@ classifyLink x = x -- Helpers -- --------------------------------------------------------------------------- +-- | True if the URL points outside the site's domain. +-- +-- Uses a strict hostname comparison rather than substring matching, so +-- that a hostile lookalike like @evil-levineuwirth.org.attacker.com@ is +-- correctly classified as external (and gets @rel=noopener noreferrer@ +-- plus @target=_blank@ applied). isExternal :: Text -> Bool isExternal url = - ("http://" `T.isPrefixOf` url || "https://" `T.isPrefixOf` url) - && not ("levineuwirth.org" `T.isInfixOf` url) + case extractHost url of + Nothing -> False + Just host -> + not (host == siteHost || ("." <> siteHost) `T.isSuffixOf` host) + where + siteHost = "levineuwirth.org" + +-- | Extract the lowercased hostname from an absolute http(s) URL. +-- Returns 'Nothing' for non-http(s) URLs (relative paths, mailto:, etc.). +extractHost :: Text -> Maybe Text +extractHost url + | Just rest <- T.stripPrefix "https://" url = Just (hostOf rest) + | Just rest <- T.stripPrefix "http://" url = Just (hostOf rest) + | otherwise = Nothing + where + hostOf rest = + let withPort = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest + host = T.takeWhile (/= ':') withPort + in T.toLower host -- | Icon name for the link, matching a file in /images/link-icons/.svg. domainIcon :: Text -> Text diff --git a/build/Filters/Score.hs b/build/Filters/Score.hs index 00a0b68..85ba031 100644 --- a/build/Filters/Score.hs +++ b/build/Filters/Score.hs @@ -14,12 +14,16 @@ -- the appropriate exhibit attributes for gallery.js TOC integration. module Filters.Score (inlineScores) where +import Control.Exception (IOException, try) import Data.Maybe (listToMaybe) import qualified Data.Text as T import qualified Data.Text.IO as TIO +import System.Directory (doesFileExist) import System.FilePath (()) +import System.IO (hPutStrLn, stderr) import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) +import qualified Utils as U -- | Walk the Pandoc AST and inline all score-fragment divs. -- @baseDir@ is the directory of the source file; image paths in the @@ -37,11 +41,40 @@ inlineScore baseDir (Div (_, cls, attrs) blocks) Nothing -> return $ Div ("", cls, attrs) blocks Just path -> do let fullPath = baseDir T.unpack path - svgRaw <- TIO.readFile fullPath - let html = buildHtml mName mCaption (processColors svgRaw) - return $ RawBlock (Format "html") html + exists <- doesFileExist fullPath + if not exists + then do + hPutStrLn stderr $ + "[Score] missing SVG: " ++ fullPath + ++ " (referenced from a score-fragment in " ++ baseDir ++ ")" + return (errorBlock mName ("Missing score: " <> path)) + else do + result <- try (TIO.readFile fullPath) :: IO (Either IOException T.Text) + case result of + Left e -> do + hPutStrLn stderr $ + "[Score] read error on " ++ fullPath ++ ": " ++ show e + return (errorBlock mName ("Could not read score: " <> path)) + Right svgRaw -> do + let html = buildHtml mName mCaption (processColors svgRaw) + return $ RawBlock (Format "html") html inlineScore _ block = return block +-- | Render an inline error block in place of a missing or unreadable score. +-- Mirrors the convention in 'Filters.Viz.errorBlock' so build failures are +-- visible to the author without aborting the entire site build. +errorBlock :: Maybe T.Text -> T.Text -> Block +errorBlock mName message = + RawBlock (Format "html") $ T.concat + [ "

    " data-exhibit-name=\"" <> escHtml n <> "\"") mName + , ">" + , "
    " + , escHtml message + , "
    " + , "
    " + ] + -- | Extract the image src from the first Para that contains an Image inline. findImagePath :: [Block] -> Maybe T.Text findImagePath blocks = listToMaybe @@ -86,7 +119,4 @@ buildHtml mName mCaption svgContent = T.concat ] escHtml :: T.Text -> T.Text -escHtml = T.replace "\"" """ - . T.replace ">" ">" - . T.replace "<" "<" - . T.replace "&" "&" +escHtml = U.escapeHtmlText diff --git a/build/Filters/Sidenotes.hs b/build/Filters/Sidenotes.hs index 5ac3633..dfb6e41 100644 --- a/build/Filters/Sidenotes.hs +++ b/build/Filters/Sidenotes.hs @@ -33,13 +33,24 @@ convertNote (Note blocks) = do return $ RawInline "html" (renderNote n blocks) convertNote x = return x --- | Convert a 1-based counter to a letter label: 1→a, 2→b, … 26→z. +-- | Convert a 1-based counter to a letter label using base-26 expansion +-- (Excel-column style): 1→a, 2→b, … 26→z, 27→aa, 28→ab, … 52→az, +-- 53→ba, … 702→zz, 703→aaa. Guarantees a unique label per counter so +-- no two sidenotes in a single document collide on @id="sn-…"@. toLabel :: Int -> Text -toLabel n = T.singleton (toEnum (fromEnum 'a' + (n - 1) `mod` 26)) +toLabel n + | n <= 0 = "?" + | otherwise = T.pack (go n) + where + go k + | k <= 0 = "" + | otherwise = + let (q, r) = (k - 1) `divMod` 26 + in go q ++ [toEnum (fromEnum 'a' + r)] renderNote :: Int -> [Block] -> Text renderNote n blocks = - let inner = replacePTags (blocksToHtml blocks) + let inner = blocksToInlineHtml blocks lbl = toLabel n in T.concat [ "" @@ -51,13 +62,34 @@ renderNote n blocks = , "" ] --- | Replace

    /

    with inline-block spans so that sidenote content --- stays valid inside the outer . A bare

    inside --- a is invalid HTML and causes browsers to implicitly close the span. -replacePTags :: Text -> Text -replacePTags = - T.replace "

    " "" - . T.replace "

    " "
    " +-- | Render a list of Pandoc blocks for inclusion inside an inline @@. Each top-level @Para@ is wrapped in a +-- @@ instead of a @

    @ (which would be +-- invalid inside a @@); other block types are rendered with the +-- regular Pandoc HTML writer. +-- +-- Operating on the AST is preferred over post-rendered string +-- substitution because the latter mangles content that legitimately +-- contains the literal text @

    @ (e.g. code samples discussing HTML). +blocksToInlineHtml :: [Block] -> Text +blocksToInlineHtml = T.concat . map renderOne + where + renderOne :: Block -> Text + renderOne (Para inlines) = + "" + <> inlinesToHtml inlines + <> "" + renderOne (Plain inlines) = + inlinesToHtml inlines + renderOne b = + blocksToHtml [b] + +-- | Render a list of inlines to HTML (no surrounding @

    @). +inlinesToHtml :: [Inline] -> Text +inlinesToHtml inlines = + case runPure (writeHtml5String (def :: WriterOptions) (Pandoc mempty [Plain inlines])) of + Left _ -> T.empty + Right t -> t -- | Render a list of Pandoc blocks to an HTML fragment via a pure writer run. blocksToHtml :: [Block] -> Text diff --git a/build/Filters/Smallcaps.hs b/build/Filters/Smallcaps.hs index a81521e..3c7b5ad 100644 --- a/build/Filters/Smallcaps.hs +++ b/build/Filters/Smallcaps.hs @@ -22,6 +22,7 @@ import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Definition import Text.Pandoc.Walk (walk) +import qualified Utils as U -- | Apply smallcaps detection to paragraph-level content. -- Skips heading blocks to avoid false positives. @@ -62,10 +63,4 @@ isAbbreviation t = && T.any isAlpha t escHtml :: Text -> Text -escHtml = T.concatMap esc - where - esc '<' = "<" - esc '>' = ">" - esc '&' = "&" - esc '"' = """ - esc c = T.singleton c +escHtml = U.escapeHtmlText diff --git a/build/Filters/Transclusion.hs b/build/Filters/Transclusion.hs index ddc0822..4a18a93 100644 --- a/build/Filters/Transclusion.hs +++ b/build/Filters/Transclusion.hs @@ -14,6 +14,7 @@ module Filters.Transclusion (preprocess) where import Data.List (isSuffixOf, isPrefixOf, stripPrefix) +import qualified Utils as U -- | Apply transclusion substitution to the raw Markdown source string. preprocess :: String -> String @@ -21,14 +22,18 @@ preprocess = unlines . map processLine . lines processLine :: String -> String processLine line = - case parseDirective (trim line) of + case parseDirective (U.trim line) of Nothing -> line Just (url, secAttr) -> - "

    " -- | Parse a {{slug}} or {{slug#section}} directive. -- Returns (absolute-url, section-attribute-string) or Nothing. +-- +-- The section name is HTML-escaped before being interpolated into the +-- @data-section@ attribute, so a stray @\"@, @&@, @<@, or @>@ in a +-- section name cannot break the surrounding markup. parseDirective :: String -> Maybe (String, String) parseDirective s = do inner <- stripPrefix "{{" s >>= stripSuffix "}}" @@ -38,15 +43,29 @@ parseDirective s = do (slug, '#' : sec) | null sec -> Just (slugToUrl slug, "") | otherwise -> Just (slugToUrl slug, - " data-section=\"" ++ sec ++ "\"") + " data-section=\"" ++ escAttr sec ++ "\"") _ -> Nothing -- | Convert a slug (possibly with leading slash, possibly with path segments) --- to a root-relative .html URL. +-- to a root-relative .html URL. Idempotent for slugs that already end in +-- @.html@ so callers can safely pass either form. slugToUrl :: String -> String slugToUrl slug - | "/" `isPrefixOf` slug = slug ++ ".html" - | otherwise = "/" ++ slug ++ ".html" + | ".html" `isSuffixOf` slug, "/" `isPrefixOf` slug = slug + | ".html" `isSuffixOf` slug = "/" ++ slug + | "/" `isPrefixOf` slug = slug ++ ".html" + | otherwise = "/" ++ slug ++ ".html" + +-- | Minimal HTML attribute-value escape. +escAttr :: String -> String +escAttr = concatMap esc + where + esc '&' = "&" + esc '<' = "<" + esc '>' = ">" + esc '"' = """ + esc '\'' = "'" + esc c = [c] -- | Strip a suffix from a string, returning Nothing if not present. stripSuffix :: String -> String -> Maybe String @@ -54,7 +73,3 @@ stripSuffix suf str | suf `isSuffixOf` str = Just (take (length str - length suf) str) | otherwise = Nothing --- | Strip leading and trailing spaces. -trim :: String -> String -trim = f . f - where f = reverse . dropWhile (== ' ') diff --git a/build/Filters/Typography.hs b/build/Filters/Typography.hs index bbc8f8d..41fdf86 100644 --- a/build/Filters/Typography.hs +++ b/build/Filters/Typography.hs @@ -9,9 +9,9 @@ module Filters.Typography (apply) where import Data.Text (Text) -import qualified Data.Text as T import Text.Pandoc.Definition import Text.Pandoc.Walk (walk) +import Utils (escapeHtmlText) -- | Apply all typographic transformations to the document. apply :: Pandoc -> Pandoc @@ -38,21 +38,17 @@ abbrevMap = -- | If the Str token exactly matches a known abbreviation, replace it with -- a @RawInline "html"@ @@ element; otherwise leave it unchanged. +-- +-- Both the @title@ attribute and the visible body pass through +-- 'escapeHtmlText' for consistency with every other raw-HTML emitter +-- in the filter pipeline. The abbreviations themselves are ASCII-safe +-- so this is defense-in-depth rather than a live hazard. expandAbbrev :: Inline -> Inline expandAbbrev (Str t) = case lookup t abbrevMap of Just title -> RawInline "html" $ - " title <> "\">" <> escHtml t <> "" + " escapeHtmlText title <> "\">" + <> escapeHtmlText t <> "" Nothing -> Str t expandAbbrev x = x - --- | Minimal HTML escaping for the abbr content (should be plain text). -escHtml :: Text -> Text -escHtml = T.concatMap esc - where - esc '<' = "<" - esc '>' = ">" - esc '&' = "&" - esc '"' = """ - esc c = T.singleton c diff --git a/build/Filters/Viz.hs b/build/Filters/Viz.hs index 04ec635..408f3ec 100644 --- a/build/Filters/Viz.hs +++ b/build/Filters/Viz.hs @@ -39,11 +39,14 @@ module Filters.Viz (inlineViz) where import Control.Exception (IOException, catch) import Data.Maybe (fromMaybe) import qualified Data.Text as T +import System.Directory (doesFileExist) import System.Exit (ExitCode (..)) import System.FilePath (()) +import System.IO (hPutStrLn, stderr) import System.Process (readProcessWithExitCode) import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) +import qualified Utils as U -- --------------------------------------------------------------------------- -- Public entry point @@ -87,19 +90,26 @@ transformBlock _ b = return b -- --------------------------------------------------------------------------- -- | Run @python3 $endif$ $if(reading)$$endif$ $for(page-scripts)$$endfor$ $if(math)$ - + + $endif$ diff --git a/templates/partials/head.html b/templates/partials/head.html index 6c70520..0e7e7a3 100644 --- a/templates/partials/head.html +++ b/templates/partials/head.html @@ -28,6 +28,7 @@ $endif$ $if(search)$ $endif$ + $if(viz)$ diff --git a/templates/partials/nav.html b/templates/partials/nav.html index 97ceb6e..969de60 100644 --- a/templates/partials/nav.html +++ b/templates/partials/nav.html @@ -12,36 +12,36 @@ Search