audit: Haskell build system correctness + Patterns.hs + Stats blaze rewrite

This commit is contained in:
Levi Neuwirth 2026-04-10 17:40:49 -04:00
parent c864e2f9cc
commit a358c8b246
28 changed files with 1290 additions and 548 deletions

View File

@ -12,32 +12,29 @@
module Authors
( buildAllAuthors
, applyAuthorRules
, authorLinksField
) where
import Data.Char (isAlphaNum, toLower)
import Data.Maybe (fromMaybe)
import Hakyll
import Hakyll.Core.Metadata (lookupStringList)
import Pagination (sortAndGroup)
import Tags (tagLinksField)
import Patterns (authorIndexable)
import Contexts (abstractField, tagLinksField)
import Utils (authorSlugify, authorNameOf)
-- ---------------------------------------------------------------------------
-- Slug helpers
--
-- The slugify and nameOf helpers used to live here in their own
-- definitions; they now defer to 'Utils' so that they cannot drift from
-- the 'Contexts' versions on Unicode edge cases.
-- ---------------------------------------------------------------------------
-- | Lowercase, replace spaces with hyphens, strip anything else.
slugify :: String -> String
slugify = map (\c -> if c == ' ' then '-' else c)
. filter (\c -> isAlphaNum c || c == ' ')
. map toLower
slugify = authorSlugify
-- | Extract the author name from a "Name | url" entry, trimming whitespace.
nameOf :: String -> String
nameOf s = strip $ case break (== '|') s of { (n, _) -> n }
where
strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ')
nameOf = authorNameOf
-- ---------------------------------------------------------------------------
@ -47,8 +44,10 @@ nameOf s = strip $ case break (== '|') s of { (n, _) -> n }
defaultAuthor :: String
defaultAuthor = "Levi Neuwirth"
-- | Content patterns indexed by author. Sourced from 'Patterns.authorIndexable'
-- so this stays in lockstep with Tags.hs and Backlinks.hs.
allContent :: Pattern
allContent = ("content/essays/*.md" .||. "content/blog/*.md") .&&. hasNoVersion
allContent = authorIndexable
-- ---------------------------------------------------------------------------
@ -103,25 +102,7 @@ applyAuthorRules authors baseCtx = tagsRules authors $ \name pat -> do
where
itemCtx = dateField "date" "%-d %B %Y"
<> tagLinksField "item-tags"
<> abstractField
<> defaultContext
-- ---------------------------------------------------------------------------
-- Context field
-- ---------------------------------------------------------------------------
-- | Exposes each item's authors as @author-name@ / @author-url@ pairs.
-- All links point to /authors/{slug}/, regardless of any URL in frontmatter.
-- Defaults to Levi Neuwirth when no "authors" frontmatter key is present.
--
-- Usage in templates:
-- $for(author-links)$<a href="$author-url$">$author-name$</a>$sep$, $endfor$
authorLinksField :: Context a
authorLinksField = listFieldWith "author-links" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let entries = fromMaybe [] (lookupStringList "authors" meta)
names = if null entries then [defaultAuthor] else map nameOf entries
return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ slugify n ++ "/")) names
where
ctx = field "author-name" (return . fst . itemBody)
<> field "author-url" (return . snd . itemBody)

View File

@ -32,10 +32,12 @@ import Data.Ord (comparing)
import Data.Maybe (fromMaybe)
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import qualified Data.ByteString as BS
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Aeson as Aeson
import Data.Aeson ((.=))
import Text.Pandoc.Class (runPure)
@ -47,6 +49,7 @@ import Text.Pandoc.Walk (query)
import Hakyll
import Compilers (readerOpts, writerOpts)
import Filters (preprocessSource)
import qualified Patterns as P
-- ---------------------------------------------------------------------------
-- Link-with-context entry (intermediate, saved by the "links" pass)
@ -184,28 +187,47 @@ linksCompiler = do
-- ---------------------------------------------------------------------------
-- | Normalise an internal URL as a map key: strip query string, fragment,
-- and trailing @.html@; ensure a leading slash.
-- and trailing @.html@; ensure a leading slash; percent-decode the path
-- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same
-- key.
normaliseUrl :: String -> String
normaliseUrl url =
let t = T.pack url
t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t)))
t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1
t3 = fromMaybe t2 (T.stripSuffix ".html" t2)
in T.unpack t3
in percentDecode (T.unpack t3)
-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the
-- resulting bytestring as UTF-8. Invalid escapes are passed through
-- verbatim so this is safe to call on already-decoded input.
percentDecode :: String -> String
percentDecode = T.unpack . TE.decodeUtf8With lenientDecode . pack . go
where
go [] = []
go ('%':a:b:rest)
| Just hi <- hexDigit a
, Just lo <- hexDigit b
= fromIntegral (hi * 16 + lo) : go rest
go (c:rest) = fromIntegral (fromEnum c) : go rest
hexDigit c
| c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0')
| c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10)
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
| otherwise = Nothing
pack = BS.pack
lenientDecode = TE.lenientDecode
-- ---------------------------------------------------------------------------
-- Content patterns (must match the rules in Site.hs)
-- Content patterns (must match the rules in Site.hs — sourced from
-- Patterns.allContent so additions to the canonical list automatically
-- propagate to backlinks).
-- ---------------------------------------------------------------------------
allContent :: Pattern
allContent =
"content/essays/*.md"
.||. "content/essays/*/index.md"
.||. "content/blog/*.md"
.||. "content/poetry/*.md"
.||. "content/fiction/*.md"
.||. "content/music/*/index.md"
.||. "content/*.md"
allContent = P.allContent
-- ---------------------------------------------------------------------------
-- Hakyll rules

View File

@ -7,7 +7,8 @@ module Catalog
( musicCatalogCtx
) where
import Data.List (groupBy, sortBy)
import Data.Char (isSpace, toLower)
import Data.List (groupBy, isPrefixOf, sortBy)
import Data.Maybe (fromMaybe)
import Data.Ord (comparing)
import Data.Aeson (Value (..))
@ -15,7 +16,6 @@ import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V
import qualified Data.Text as T
import Hakyll
import Hakyll.Core.Metadata (lookupStringList)
import Contexts (siteCtx)
-- ---------------------------------------------------------------------------
@ -115,6 +115,40 @@ parseCatalogEntry item = do
-- ---------------------------------------------------------------------------
-- HTML rendering
-- ---------------------------------------------------------------------------
--
-- Trust model: per the site convention (see also Stats.hs:pageLink),
-- frontmatter @title@ values are author-controlled trusted HTML and may
-- contain inline markup such as @<em>...</em>@. They are emitted
-- pre-escaped — but we still escape every other interpolated frontmatter
-- value (year, duration, instrumentation) and sanitize hrefs through
-- 'safeHref', so a stray @<@ in those fields cannot break the markup.
-- | Defense-in-depth href sanitiser. Mirrors 'Stats.isSafeUrl'.
safeHref :: String -> String
safeHref u =
let norm = map toLower (dropWhile isSpace u)
in if not ("//" `isPrefixOf` norm)
&& any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
then escAttr u
else "#"
escAttr :: String -> String
escAttr = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '"' = "&quot;"
esc '\'' = "&#39;"
esc c = [c]
escText :: String -> String
escText = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc c = [c]
renderIndicators :: CatalogEntry -> String
renderIndicators e = concatMap render
@ -129,19 +163,21 @@ renderEntry :: CatalogEntry -> String
renderEntry e = concat
[ "<li class=\"catalog-entry\">"
, "<div class=\"catalog-entry-main\">"
, "<a class=\"catalog-title\" href=\"", ceUrl e, "\">", ceTitle e, "</a>"
, "<a class=\"catalog-title\" href=\"", safeHref (ceUrl e), "\">"
, ceTitle e
, "</a>"
, renderIndicators e
, maybe "" (\y -> "<span class=\"catalog-year\">" ++ y ++ "</span>") (ceYear e)
, maybe "" (\d -> "<span class=\"catalog-duration\">" ++ d ++ "</span>") (ceDuration e)
, maybe "" (\y -> "<span class=\"catalog-year\">" ++ escText y ++ "</span>") (ceYear e)
, maybe "" (\d -> "<span class=\"catalog-duration\">" ++ escText d ++ "</span>") (ceDuration e)
, "</div>"
, maybe "" (\i -> "<div class=\"catalog-instrumentation\">" ++ i ++ "</div>") (ceInstrumentation e)
, maybe "" (\i -> "<div class=\"catalog-instrumentation\">" ++ escText i ++ "</div>") (ceInstrumentation e)
, "</li>"
]
renderCategorySection :: String -> [CatalogEntry] -> String
renderCategorySection cat entries = concat
[ "<section class=\"catalog-section\">"
, "<h2 class=\"catalog-section-title\">", categoryLabel cat, "</h2>"
, "<h2 class=\"catalog-section-title\">", escText (categoryLabel cat), "</h2>"
, "<ul class=\"catalog-list\">"
, concatMap renderEntry entries
, "</ul>"
@ -191,7 +227,12 @@ catalogByCategoryField = field "catalog-by-category" $ \_ -> do
else do
let sorted = sortBy (comparing (categoryRank . ceCategory)) entries
grouped = groupBy (\a b -> ceCategory a == ceCategory b) sorted
return $ concatMap (\g -> renderCategorySection (ceCategory (head g)) g) grouped
return $ concatMap renderGroup grouped
where
-- groupBy on a non-empty list yields non-empty sublists, but pattern
-- matching is total whereas 'head' is not.
renderGroup [] = ""
renderGroup g@(e : _) = renderCategorySection (ceCategory e) g
musicCatalogCtx :: Context String
musicCatalogCtx =

View File

@ -137,9 +137,14 @@ transformInline :: Map Text Int -> Inline -> Inline
transformInline keyNums (Cite citations _) =
let keys = map citationId citations
nums = mapMaybe (`Map.lookup` keyNums) keys
in if null nums
then Str ""
else RawInline "html" (markerHtml keys (head keys) (head nums) nums)
in case (keys, nums) of
-- Both lists are guaranteed non-empty by the @null nums@ check
-- below, but pattern-match to keep this total instead of
-- relying on @head@.
(firstKey : _, firstNum : _) ->
RawInline "html" (markerHtml keys firstKey firstNum nums)
_ ->
Str ""
transformInline _ x = x
markerHtml :: [Text] -> Text -> Int -> [Int] -> Text

View File

@ -125,9 +125,9 @@ renderThemedView entries =
renderChronoView :: [CPEntry] -> String
renderChronoView entries =
"<div class=\"cp-chrono\" id=\"cp-chrono\" hidden>"
++ if null sorted
++ (if null sorted
then "<p class=\"cp-empty\">No entries yet.</p>"
else concatMap renderEntry sorted
else concatMap renderEntry sorted)
++ "</div>"
where
sorted = sortBy (comparing (Down . cpDateAdded)) entries

View File

@ -12,7 +12,6 @@ module Compilers
) where
import Hakyll
import Hakyll.Core.Metadata (lookupStringList, lookupString)
import Text.Pandoc.Definition (Pandoc (..), Block (..),
Inline (..))
import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..),
@ -158,7 +157,9 @@ essayCompilerWith rOpts = do
Viz.inlineViz srcDir pandocWithScores
-- Apply remaining AST-level filters (sidenotes, smallcaps, links, etc.).
let pandocFiltered = applyAll pandocWithViz
-- applyAll touches the filesystem via Images.apply (webp existence
-- check), so it runs through unsafeCompiler.
pandocFiltered <- unsafeCompiler $ applyAll srcDir pandocWithViz
let pandocItem' = itemSetBody pandocFiltered pandocItem
-- Build TOC from the filtered AST.
@ -205,8 +206,12 @@ pageCompiler = do
body <- getResourceBody
let src = itemBody body
body' = itemSetBody (preprocessSource src) body
pandocItem <- fmap (fmap applyAll) (readPandocWith readerOpts body')
let htmlItem = writePandocWith writerOpts pandocItem
filePath <- getResourceFilePath
let srcDir = takeDirectory filePath
pandocItem <- readPandocWith readerOpts body'
pandocFiltered <- unsafeCompiler $ applyAll srcDir (itemBody pandocItem)
let pandocItem' = itemSetBody pandocFiltered pandocItem
let htmlItem = writePandocWith writerOpts pandocItem'
_ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem)
_ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem)
return htmlItem

View File

@ -9,13 +9,16 @@ module Contexts
, fictionCtx
, compositionCtx
, contentKindField
, abstractField
, tagLinksField
, authorLinksField
) where
import Data.Aeson (Value (..))
import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V
import Data.List (isPrefixOf)
import Data.Maybe (catMaybes, fromMaybe)
import Data.List (intercalate, isPrefixOf)
import Data.Maybe (fromMaybe)
import Data.Time.Calendar (toGregorian)
import Data.Time.Clock (getCurrentTime, utctDay)
import Data.Time.Format (formatTime, defaultTimeLocale)
@ -24,13 +27,11 @@ import Text.Read (readMaybe)
import qualified Data.Text as T
import Text.Pandoc (runPure, readMarkdown, writeHtml5String, Pandoc(..), Block(..), Inline(..))
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
import Hakyll
import Hakyll.Core.Metadata (lookupStringList)
import Authors (authorLinksField)
import Hakyll hiding (trim)
import Backlinks (backlinksField)
import SimilarLinks (similarLinksField)
import Stability (stabilityField, lastReviewedField, versionHistoryField)
import Tags (tagLinksField)
import Utils (authorSlugify, authorNameOf, trim)
-- ---------------------------------------------------------------------------
-- Affiliation field
@ -96,12 +97,12 @@ contentKindField = field "item-kind" $ \item -> do
r <- getRoute (itemIdentifier item)
return $ case r of
Nothing -> "Page"
Just route
| "essays/" `isPrefixOf` route -> "Essay"
| "blog/" `isPrefixOf` route -> "Post"
| "poetry/" `isPrefixOf` route -> "Poem"
| "fiction/" `isPrefixOf` route -> "Fiction"
| "music/" `isPrefixOf` route -> "Composition"
Just r'
| "essays/" `isPrefixOf` r' -> "Essay"
| "blog/" `isPrefixOf` r' -> "Post"
| "poetry/" `isPrefixOf` r' -> "Poem"
| "fiction/" `isPrefixOf` r' -> "Fiction"
| "music/" `isPrefixOf` r' -> "Composition"
| otherwise -> "Page"
-- ---------------------------------------------------------------------------
@ -112,22 +113,79 @@ contentKindField = field "item-kind" $ \item -> do
-- in the @js:@ frontmatter key (accepts a scalar string or a YAML list).
-- Returns an empty list when absent; $for iterates zero times, emitting nothing.
-- NOTE: do not use fail here — $for does not catch noResult the way $if does.
--
-- Each child Item is keyed on @<parent-identifier>#js-<index>@ so that two
-- pages referencing the same script path (e.g. @shared.js@) do not collide
-- in Hakyll's item store.
pageScriptsField :: Context String
pageScriptsField = listFieldWith "page-scripts" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let scripts = case lookupStringList "js" meta of
Just xs -> xs
Nothing -> maybe [] (:[]) (lookupString "js" meta)
return $ map (\s -> Item (fromFilePath s) s) scripts
parent = toFilePath (itemIdentifier item)
return $ zipWith
(\i s -> Item (fromFilePath (parent ++ "#js-" ++ show (i :: Int))) s)
[0 ..]
scripts
where
ctx = field "script-src" (return . itemBody)
-- ---------------------------------------------------------------------------
-- Tag links field
-- ---------------------------------------------------------------------------
-- | List context field exposing an item's own (non-expanded) tags as
-- @tag-name@ / @tag-url@ objects.
--
-- $for(essay-tags)$<a href="$tag-url$">$tag-name$</a>$endfor$
tagLinksField :: String -> Context a
tagLinksField fieldName = listFieldWith fieldName ctx $ \item ->
map toItem <$> getTags (itemIdentifier item)
where
toItem t = Item (fromFilePath (t ++ "/index.html")) t
ctx = field "tag-name" (return . itemBody)
<> field "tag-url" (\i -> return $ "/" ++ itemBody i ++ "/")
-- ---------------------------------------------------------------------------
-- Author links field
-- ---------------------------------------------------------------------------
--
-- 'authorSlugify' and 'authorNameOf' are imported from 'Utils' so that
-- they cannot drift from the copies in 'Authors'.
-- | Exposes each item's authors as @author-name@ / @author-url@ pairs.
-- Defaults to Levi Neuwirth when no "authors" frontmatter key is present.
--
-- Entries that produce an empty name (e.g. @"| https://url"@) or an empty
-- slug (e.g. all-punctuation names) are dropped, so the field never emits
-- a @/authors//@ link.
--
-- $for(author-links)$<a href="$author-url$">$author-name$</a>$sep$, $endfor$
authorLinksField :: Context a
authorLinksField = listFieldWith "author-links" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let entries = fromMaybe [] (lookupStringList "authors" meta)
rawNames = if null entries then ["Levi Neuwirth"] else map authorNameOf entries
validNames = filter (\n -> not (null n) && not (null (authorSlugify n))) rawNames
names = if null validNames then ["Levi Neuwirth"] else validNames
return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ authorSlugify n ++ "/")) names
where
ctx = field "author-name" (return . fst . itemBody)
<> field "author-url" (return . snd . itemBody)
-- ---------------------------------------------------------------------------
-- Abstract field
-- ---------------------------------------------------------------------------
-- | Renders the abstract using Pandoc to support Markdown and LaTeX math.
-- Strips the outer <p> tag if the abstract is a single paragraph.
-- Strips the outer @<p>@ wrapping. A single-paragraph abstract becomes a
-- bare @Plain@ so the rendered HTML is unwrapped inlines. A multi-paragraph
-- abstract (author used a blank line in the YAML literal block) is flattened
-- to a single @Plain@ with @LineBreak@ separators between what were
-- originally paragraph boundaries — the visual break is preserved without
-- emitting stray @<p>@ tags inside the metadata block. Mixed block content
-- (e.g. an abstract containing a blockquote) falls through unchanged.
abstractField :: Context String
abstractField = field "abstract" $ \item -> do
meta <- getMetadata (itemIdentifier item)
@ -138,12 +196,20 @@ abstractField = field "abstract" $ \item -> do
doc <- readMarkdown defaultHakyllReaderOptions (T.pack src)
let doc' = case doc of
Pandoc m [Para ils] -> Pandoc m [Plain ils]
Pandoc m blocks
| all isPara blocks && not (null blocks) ->
let joined = intercalate [LineBreak]
[ils | Para ils <- blocks]
in Pandoc m [Plain joined]
_ -> doc
let wOpts = defaultHakyllWriterOptions { writerHTMLMathMethod = MathML }
writeHtml5String wOpts doc'
case pandocResult of
Left err -> fail $ "Pandoc error rendering abstract: " ++ show err
Right html -> return (T.unpack html)
where
isPara (Para _) = True
isPara _ = False
siteCtx :: Context String
siteCtx =
@ -208,21 +274,37 @@ dotsField ctxKey metaKey = field ctxKey $ \item -> do
-- | @$confidence-trend$@: ↑, ↓, or → derived from the last two entries
-- in the @confidence-history@ frontmatter list. Returns @noResult@ when
-- there is no history or only a single entry.
--
-- The arrow flips when the absolute change crosses 'trendThreshold'
-- (currently 5 percentage points). Smaller swings count as flat.
confidenceTrendField :: Context String
confidenceTrendField = field "confidence-trend" $ \item -> do
meta <- getMetadata (itemIdentifier item)
case lookupStringList "confidence-history" meta of
Nothing -> fail "no confidence history"
Just xs | length xs < 2 -> fail "no confidence history"
Just xs ->
let prev = readMaybe (xs !! (length xs - 2)) :: Maybe Int
cur = readMaybe (last xs) :: Maybe Int
Just xs -> case lastTwo xs of
Nothing -> fail "no confidence history"
Just (prevS, curS) ->
let prev = readMaybe prevS :: Maybe Int
cur = readMaybe curS :: Maybe Int
in case (prev, cur) of
(Just p, Just c)
| c - p > 5 -> return "\x2191" -- ↑
| p - c > 5 -> return "\x2193" -- ↓
| c - p > trendThreshold -> return "\x2191" -- ↑
| p - c > trendThreshold -> return "\x2193" -- ↓
| otherwise -> return "\x2192" -- →
_ -> return "\x2192"
where
trendThreshold :: Int
trendThreshold = 5
-- Total replacement for @(xs !! (length xs - 2), last xs)@: returns
-- the last two elements of a list, in order, or 'Nothing' when the
-- list has fewer than two entries.
lastTwo :: [a] -> Maybe (a, a)
lastTwo [] = Nothing
lastTwo [_] = Nothing
lastTwo [a, b] = Just (a, b)
lastTwo (_ : rest) = lastTwo rest
-- | @$overall-score$@: weighted composite of confidence (50 %),
-- evidence quality (30 %), and importance (20 %), expressed as an
@ -332,12 +414,27 @@ data Movement = Movement
, movAudio :: Maybe String
}
parseMovements :: Metadata -> [Movement]
parseMovements meta =
-- | Parse the @movements@ frontmatter key. Returns parsed movements and a
-- list of human-readable warnings for any entries that failed to parse.
-- Callers can surface the warnings via 'unsafeCompiler' so silent typos
-- don't strip movements without diagnostic.
parseMovementsWithWarnings :: Metadata -> ([Movement], [String])
parseMovementsWithWarnings meta =
case KM.lookup "movements" meta of
Just (Array v) -> catMaybes $ map parseOne (V.toList v)
_ -> []
Just (Array v) ->
let results = zipWith parseIndexed [1 :: Int ..] (V.toList v)
in ( [m | Right m <- results]
, [w | Left w <- results]
)
_ -> ([], [])
where
parseIndexed i value =
case parseOne value of
Just m -> Right m
Nothing -> Left $
"movement #" ++ show i ++ " is missing a required field "
++ "(name, page, or duration) — entry skipped"
parseOne (Object o) = Movement
<$> (getString =<< KM.lookup "name" o)
<*> (getInt =<< KM.lookup "page" o)
@ -351,6 +448,9 @@ parseMovements meta =
getInt (Number n) = Just (floor (fromRational (toRational n) :: Double))
getInt _ = Nothing
parseMovements :: Metadata -> [Movement]
parseMovements = fst . parseMovementsWithWarnings
-- | Extract the composition slug from an item's identifier.
-- "content/music/symphonic-dances/index.md" → "symphonic-dances"
compSlug :: Item a -> String
@ -410,7 +510,11 @@ compositionCtx =
movementsListField = listFieldWith "movements" movCtx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let mvs = parseMovements meta
let (mvs, warnings) = parseMovementsWithWarnings meta
ident = toFilePath (itemIdentifier item)
unsafeCompiler $ mapM_
(\w -> putStrLn $ "[Movements] " ++ ident ++ ": " ++ w)
warnings
return $ zipWith
(\idx mv -> Item (fromFilePath ("mv" ++ show (idx :: Int))) mv)
[1..] mvs

View File

@ -22,16 +22,25 @@ import qualified Filters.Images as Images
-- | Apply all AST-level filters in pipeline order.
-- Run on the Pandoc document after reading, before writing.
applyAll :: Pandoc -> Pandoc
applyAll
= Sidenotes.apply
--
-- 'Filters.Images.apply' is the only IO-performing filter (it probes the
-- filesystem for @.webp@ companions before deciding whether to emit
-- @<picture>@). It runs first — i.e. innermost in the composition — and
-- every downstream filter stays pure. @srcDir@ is the directory of the
-- source Markdown file, passed through to Images for relative-path
-- resolution of co-located assets.
applyAll :: FilePath -> Pandoc -> IO Pandoc
applyAll srcDir doc = do
imagesDone <- Images.apply srcDir doc
pure
. Sidenotes.apply
. Typography.apply
. Links.apply
. Smallcaps.apply
. Dropcaps.apply
. Math.apply
. Code.apply
. Images.apply
$ imagesDone
-- | Apply source-level preprocessors to the raw Markdown string.
-- Order matters: EmbedPdf must run before Transclusion, because the

View File

@ -16,6 +16,7 @@ module Filters.EmbedPdf (preprocess) where
import Data.Char (isDigit)
import Data.List (isPrefixOf, isSuffixOf)
import qualified Utils as U
-- | Apply PDF-embed substitution to the raw Markdown source string.
preprocess :: String -> String
@ -23,7 +24,7 @@ preprocess = unlines . map processLine . lines
processLine :: String -> String
processLine line =
case parseDirective (trim line) of
case parseDirective (U.trim line) of
Nothing -> line
Just (filePath, pageHash) -> renderEmbed filePath pageHash
@ -64,7 +65,9 @@ renderEmbed filePath pageHash =
-- | Percent-encode characters that would break a query-string value.
-- Slashes are left unencoded so root-relative paths remain readable and
-- work correctly with PDF.js's internal fetch.
-- work correctly with PDF.js's internal fetch. @#@ is encoded for
-- defense-in-depth even though the directive parser already splits on it
-- before this function is called.
encodeQueryValue :: String -> String
encodeQueryValue = concatMap enc
where
@ -73,9 +76,6 @@ encodeQueryValue = concatMap enc
enc '?' = "%3F"
enc '+' = "%2B"
enc '"' = "%22"
enc '#' = "%23"
enc c = [c]
-- | Strip leading and trailing spaces.
trim :: String -> String
trim = f . f
where f = reverse . dropWhile (== ' ')

View File

@ -2,53 +2,93 @@
{-# LANGUAGE OverloadedStrings #-}
-- | Image filter: lazy loading, lightbox markers, and WebP <picture> wrappers.
--
-- For local raster images (JPG, JPEG, PNG, GIF), emits a @<picture>@ element
-- with a WebP @<source>@ and the original format as the @<img>@ fallback.
-- tools/convert-images.sh produces the companion .webp files at build time.
-- For local raster images (JPG, JPEG, PNG, GIF) whose @.webp@ companion
-- exists on disk at build time, emits a @<picture>@ element with a WebP
-- @<source>@ and the original format as the @<img>@ fallback. When the
-- webp companion is absent (cwebp not installed, @convert-images.sh@ not
-- yet run, or a single file missed), the filter emits a plain @<img>@ so
-- the image still renders. This matters because browsers do NOT fall back
-- from a 404'd @<source>@ inside @<picture>@ to the nested @<img>@ — the
-- source is selected up front and a broken one leaves the area blank.
--
-- @tools/convert-images.sh@ produces the companion .webp files at build
-- time. When cwebp is not installed the script is a no-op, and this
-- filter degrades gracefully to plain @<img>@.
--
-- SVG files and external URLs are passed through with only lazy loading
-- (and lightbox markers for standalone images).
module Filters.Images (apply) where
import Data.Char (toLower)
import Data.List (isPrefixOf)
import Data.Text (Text)
import qualified Data.Text as T
import System.FilePath (replaceExtension)
import System.Directory (doesFileExist)
import System.FilePath (replaceExtension, takeExtension, (</>))
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- | Apply image attribute injection and WebP wrapping to the entire document.
apply :: Pandoc -> Pandoc
apply = walk transformInline
--
-- @srcDir@ is the directory of the source Markdown file, used to resolve
-- relative image paths when probing for the corresponding @.webp@
-- companion file. Absolute paths (leading @/@) are resolved against
-- @static/@ instead, matching the layout @convert-images.sh@ writes to.
apply :: FilePath -> Pandoc -> IO Pandoc
apply srcDir = walkM (transformInline srcDir)
-- ---------------------------------------------------------------------------
-- Core transformation
-- ---------------------------------------------------------------------------
transformInline :: Inline -> Inline
transformInline (Link lAttr ils lTarget) =
transformInline :: FilePath -> Inline -> IO Inline
transformInline srcDir (Link lAttr ils lTarget) = do
-- Recurse into link contents; images inside a link get no lightbox marker.
Link lAttr (map wrapLinkedImg ils) lTarget
where
wrapLinkedImg (Image iAttr alt iTarget) = renderImg iAttr alt iTarget False
wrapLinkedImg x = x
transformInline (Image attr alt target) =
renderImg attr alt target True
transformInline x = x
ils' <- mapM (wrapLinkedImg srcDir) ils
pure (Link lAttr ils' lTarget)
transformInline srcDir (Image attr alt target) =
renderImg srcDir attr alt target True
transformInline _ x = pure x
wrapLinkedImg :: FilePath -> Inline -> IO Inline
wrapLinkedImg srcDir (Image iAttr alt iTarget) =
renderImg srcDir iAttr alt iTarget False
wrapLinkedImg _ x = pure x
-- | Dispatch on image type:
-- * Local raster → @<picture>@ with WebP @<source>@
-- * Everything else → plain @<img>@ with loading/lightbox attrs
renderImg :: Attr -> [Inline] -> Target -> Bool -> Inline
renderImg attr alt target@(src, _) lightbox
| isLocalRaster (T.unpack src) =
RawInline (Format "html") (renderPicture attr alt target lightbox)
-- * Local raster with webp companion on disk → @<picture>@ with WebP @<source>@
-- * Local raster without companion → plain @<img>@ (graceful degradation)
-- * Everything else (SVG, URL) → plain @<img>@ with loading/lightbox attrs
renderImg :: FilePath -> Attr -> [Inline] -> Target -> Bool -> IO Inline
renderImg srcDir attr alt target@(src, _) lightbox
| isLocalRaster (T.unpack src) = do
hasWebp <- doesFileExist (webpPhysicalPath srcDir src)
if hasWebp
then pure $ RawInline (Format "html")
(renderPicture attr alt target lightbox)
else pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr))
alt target
| otherwise =
Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target
pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target
where
addLightbox True a = addAttr "data-lightbox" "true" a
addLightbox False a = a
-- | Physical on-disk path of the @.webp@ companion for a Markdown image src.
--
-- Absolute paths (@/images/foo.jpg@) resolve under @static/@ because that
-- is where Hakyll's static-asset rule writes them from. Relative paths
-- resolve against the source file's directory, where Pandoc already
-- expects co-located assets to live.
webpPhysicalPath :: FilePath -> Text -> FilePath
webpPhysicalPath srcDir src =
let s = T.unpack src
physical = if "/" `isPrefixOf` s
then "static" ++ s
else srcDir </> s
in replaceExtension physical ".webp"
-- ---------------------------------------------------------------------------
-- <picture> rendering
-- ---------------------------------------------------------------------------
@ -73,8 +113,13 @@ renderPicture (ident, classes, kvs) alt (src, title) lightbox =
]
where
webpSrc = replaceExtension (T.unpack src) ".webp"
-- Strip attrs we handle explicitly so they don't appear twice.
passedKvs = filter (\(k, _) -> k `notElem` ["loading", "data-lightbox"]) kvs
-- Strip attrs we handle explicitly above (id/class/alt/title) and the
-- attrs we always emit ourselves (loading, data-lightbox), so they don't
-- appear twice on the <img>.
passedKvs = filter
(\(k, _) -> k `notElem`
["loading", "data-lightbox", "id", "class", "alt", "title", "src"])
kvs
attrId :: Text -> Text
attrId t = if T.null t then "" else " id=\"" <> esc t <> "\""
@ -103,11 +148,11 @@ isLocalRaster src = not (isUrl src) && lowerExt src `elem` [".jpg", ".jpeg", ".p
isUrl :: String -> Bool
isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"]
where isPrefixOf pfx str = take (length pfx) str == pfx
-- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg").
-- Returns the empty string for paths with no extension.
lowerExt :: FilePath -> String
lowerExt = map toLower . reverse . ('.' :) . takeWhile (/= '.') . tail . dropWhile (/= '.') . reverse
lowerExt = map toLower . takeExtension
-- | Prepend a key=value pair if not already present.
addAttr :: Text -> Text -> Attr -> Attr
@ -125,18 +170,22 @@ stringify = T.concat . map go
go LineBreak = " "
go (Emph ils) = stringify ils
go (Strong ils) = stringify ils
go (Strikeout ils) = stringify ils
go (Superscript ils) = stringify ils
go (Subscript ils) = stringify ils
go (SmallCaps ils) = stringify ils
go (Underline ils) = stringify ils
go (Quoted _ ils) = stringify ils
go (Cite _ ils) = stringify ils
go (Code _ t) = t
go (Math _ t) = t
go (RawInline _ _) = ""
go (Link _ ils _) = stringify ils
go (Image _ ils _) = stringify ils
go (Span _ ils) = stringify ils
go _ = ""
go (Note _) = ""
-- | HTML-escape a text value for use in attribute values.
-- Defers to the canonical 'Utils.escapeHtmlText'.
esc :: Text -> Text
esc = T.concatMap escChar
where
escChar '&' = "&amp;"
escChar '<' = "&lt;"
escChar '>' = "&gt;"
escChar '"' = "&quot;"
escChar c = T.singleton c
esc = U.escapeHtmlText

View File

@ -25,14 +25,20 @@ apply = walk classifyLink . walk classifyPdfLink
-- Preserves the original path in @data-pdf-src@ so the popup thumbnail
-- provider can locate the corresponding @.thumb.png@ file.
-- Skips links that are already pointing at the viewer (idempotent).
--
-- Handles fragment identifiers (e.g. @/papers/foo.pdf#page=5@): the
-- fragment is stripped before the @.pdf@ suffix check and re-attached
-- after the viewer URL so PDF.js's anchor handling works.
classifyPdfLink :: Inline -> Inline
classifyPdfLink (Link (ident, classes, kvs) ils (url, title))
| "/" `T.isPrefixOf` url
, ".pdf" `T.isSuffixOf` T.toLower url
, let (path, fragment) = T.break (== '#') url
, ".pdf" `T.isSuffixOf` T.toLower path
, "pdf-link" `notElem` classes =
let viewerUrl = "/pdfjs/web/viewer.html?file=" <> encodeQueryValue url
let viewerUrl = "/pdfjs/web/viewer.html?file="
<> encodeQueryValue path <> fragment
classes' = classes ++ ["pdf-link"]
kvs' = kvs ++ [("data-pdf-src", url)]
kvs' = kvs ++ [("data-pdf-src", path)]
in Link (ident, classes', kvs') ils (viewerUrl, title)
classifyPdfLink x = x
@ -53,10 +59,33 @@ classifyLink x = x
-- Helpers
-- ---------------------------------------------------------------------------
-- | True if the URL points outside the site's domain.
--
-- Uses a strict hostname comparison rather than substring matching, so
-- that a hostile lookalike like @evil-levineuwirth.org.attacker.com@ is
-- correctly classified as external (and gets @rel=noopener noreferrer@
-- plus @target=_blank@ applied).
isExternal :: Text -> Bool
isExternal url =
("http://" `T.isPrefixOf` url || "https://" `T.isPrefixOf` url)
&& not ("levineuwirth.org" `T.isInfixOf` url)
case extractHost url of
Nothing -> False
Just host ->
not (host == siteHost || ("." <> siteHost) `T.isSuffixOf` host)
where
siteHost = "levineuwirth.org"
-- | Extract the lowercased hostname from an absolute http(s) URL.
-- Returns 'Nothing' for non-http(s) URLs (relative paths, mailto:, etc.).
extractHost :: Text -> Maybe Text
extractHost url
| Just rest <- T.stripPrefix "https://" url = Just (hostOf rest)
| Just rest <- T.stripPrefix "http://" url = Just (hostOf rest)
| otherwise = Nothing
where
hostOf rest =
let withPort = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest
host = T.takeWhile (/= ':') withPort
in T.toLower host
-- | Icon name for the link, matching a file in /images/link-icons/<name>.svg.
domainIcon :: Text -> Text

View File

@ -14,12 +14,16 @@
-- the appropriate exhibit attributes for gallery.js TOC integration.
module Filters.Score (inlineScores) where
import Control.Exception (IOException, try)
import Data.Maybe (listToMaybe)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Directory (doesFileExist)
import System.FilePath ((</>))
import System.IO (hPutStrLn, stderr)
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- | Walk the Pandoc AST and inline all score-fragment divs.
-- @baseDir@ is the directory of the source file; image paths in the
@ -37,11 +41,40 @@ inlineScore baseDir (Div (_, cls, attrs) blocks)
Nothing -> return $ Div ("", cls, attrs) blocks
Just path -> do
let fullPath = baseDir </> T.unpack path
svgRaw <- TIO.readFile fullPath
exists <- doesFileExist fullPath
if not exists
then do
hPutStrLn stderr $
"[Score] missing SVG: " ++ fullPath
++ " (referenced from a score-fragment in " ++ baseDir ++ ")"
return (errorBlock mName ("Missing score: " <> path))
else do
result <- try (TIO.readFile fullPath) :: IO (Either IOException T.Text)
case result of
Left e -> do
hPutStrLn stderr $
"[Score] read error on " ++ fullPath ++ ": " ++ show e
return (errorBlock mName ("Could not read score: " <> path))
Right svgRaw -> do
let html = buildHtml mName mCaption (processColors svgRaw)
return $ RawBlock (Format "html") html
inlineScore _ block = return block
-- | Render an inline error block in place of a missing or unreadable score.
-- Mirrors the convention in 'Filters.Viz.errorBlock' so build failures are
-- visible to the author without aborting the entire site build.
errorBlock :: Maybe T.Text -> T.Text -> Block
errorBlock mName message =
RawBlock (Format "html") $ T.concat
[ "<figure class=\"score-fragment score-fragment--error\""
, maybe "" (\n -> " data-exhibit-name=\"" <> escHtml n <> "\"") mName
, ">"
, "<div class=\"score-fragment-error\">"
, escHtml message
, "</div>"
, "</figure>"
]
-- | Extract the image src from the first Para that contains an Image inline.
findImagePath :: [Block] -> Maybe T.Text
findImagePath blocks = listToMaybe
@ -86,7 +119,4 @@ buildHtml mName mCaption svgContent = T.concat
]
escHtml :: T.Text -> T.Text
escHtml = T.replace "\"" "&quot;"
. T.replace ">" "&gt;"
. T.replace "<" "&lt;"
. T.replace "&" "&amp;"
escHtml = U.escapeHtmlText

View File

@ -33,13 +33,24 @@ convertNote (Note blocks) = do
return $ RawInline "html" (renderNote n blocks)
convertNote x = return x
-- | Convert a 1-based counter to a letter label: 1→a, 2→b, … 26→z.
-- | Convert a 1-based counter to a letter label using base-26 expansion
-- (Excel-column style): 1→a, 2→b, … 26→z, 27→aa, 28→ab, … 52→az,
-- 53→ba, … 702→zz, 703→aaa. Guarantees a unique label per counter so
-- no two sidenotes in a single document collide on @id="sn-…"@.
toLabel :: Int -> Text
toLabel n = T.singleton (toEnum (fromEnum 'a' + (n - 1) `mod` 26))
toLabel n
| n <= 0 = "?"
| otherwise = T.pack (go n)
where
go k
| k <= 0 = ""
| otherwise =
let (q, r) = (k - 1) `divMod` 26
in go q ++ [toEnum (fromEnum 'a' + r)]
renderNote :: Int -> [Block] -> Text
renderNote n blocks =
let inner = replacePTags (blocksToHtml blocks)
let inner = blocksToInlineHtml blocks
lbl = toLabel n
in T.concat
[ "<sup class=\"sidenote-ref\" id=\"snref-", lbl, "\">"
@ -51,13 +62,34 @@ renderNote n blocks =
, "</span>"
]
-- | Replace <p> / </p> with inline-block spans so that sidenote content
-- stays valid inside the outer <span class="sidenote">. A bare <p> inside
-- a <span> is invalid HTML and causes browsers to implicitly close the span.
replacePTags :: Text -> Text
replacePTags =
T.replace "<p>" "<span class=\"sidenote-para\">"
. T.replace "</p>" "</span>"
-- | Render a list of Pandoc blocks for inclusion inside an inline @<span
-- class="sidenote">@. Each top-level @Para@ is wrapped in a
-- @<span class="sidenote-para">@ instead of a @<p>@ (which would be
-- invalid inside a @<span>@); other block types are rendered with the
-- regular Pandoc HTML writer.
--
-- Operating on the AST is preferred over post-rendered string
-- substitution because the latter mangles content that legitimately
-- contains the literal text @<p>@ (e.g. code samples discussing HTML).
blocksToInlineHtml :: [Block] -> Text
blocksToInlineHtml = T.concat . map renderOne
where
renderOne :: Block -> Text
renderOne (Para inlines) =
"<span class=\"sidenote-para\">"
<> inlinesToHtml inlines
<> "</span>"
renderOne (Plain inlines) =
inlinesToHtml inlines
renderOne b =
blocksToHtml [b]
-- | Render a list of inlines to HTML (no surrounding @<p>@).
inlinesToHtml :: [Inline] -> Text
inlinesToHtml inlines =
case runPure (writeHtml5String (def :: WriterOptions) (Pandoc mempty [Plain inlines])) of
Left _ -> T.empty
Right t -> t
-- | Render a list of Pandoc blocks to an HTML fragment via a pure writer run.
blocksToHtml :: [Block] -> Text

View File

@ -22,6 +22,7 @@ import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk)
import qualified Utils as U
-- | Apply smallcaps detection to paragraph-level content.
-- Skips heading blocks to avoid false positives.
@ -62,10 +63,4 @@ isAbbreviation t =
&& T.any isAlpha t
escHtml :: Text -> Text
escHtml = T.concatMap esc
where
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '&' = "&amp;"
esc '"' = "&quot;"
esc c = T.singleton c
escHtml = U.escapeHtmlText

View File

@ -14,6 +14,7 @@
module Filters.Transclusion (preprocess) where
import Data.List (isSuffixOf, isPrefixOf, stripPrefix)
import qualified Utils as U
-- | Apply transclusion substitution to the raw Markdown source string.
preprocess :: String -> String
@ -21,14 +22,18 @@ preprocess = unlines . map processLine . lines
processLine :: String -> String
processLine line =
case parseDirective (trim line) of
case parseDirective (U.trim line) of
Nothing -> line
Just (url, secAttr) ->
"<div class=\"transclude\" data-src=\"" ++ url ++ "\""
"<div class=\"transclude\" data-src=\"" ++ escAttr url ++ "\""
++ secAttr ++ "></div>"
-- | Parse a {{slug}} or {{slug#section}} directive.
-- Returns (absolute-url, section-attribute-string) or Nothing.
--
-- The section name is HTML-escaped before being interpolated into the
-- @data-section@ attribute, so a stray @\"@, @&@, @<@, or @>@ in a
-- section name cannot break the surrounding markup.
parseDirective :: String -> Maybe (String, String)
parseDirective s = do
inner <- stripPrefix "{{" s >>= stripSuffix "}}"
@ -38,23 +43,33 @@ parseDirective s = do
(slug, '#' : sec)
| null sec -> Just (slugToUrl slug, "")
| otherwise -> Just (slugToUrl slug,
" data-section=\"" ++ sec ++ "\"")
" data-section=\"" ++ escAttr sec ++ "\"")
_ -> Nothing
-- | Convert a slug (possibly with leading slash, possibly with path segments)
-- to a root-relative .html URL.
-- to a root-relative .html URL. Idempotent for slugs that already end in
-- @.html@ so callers can safely pass either form.
slugToUrl :: String -> String
slugToUrl slug
| ".html" `isSuffixOf` slug, "/" `isPrefixOf` slug = slug
| ".html" `isSuffixOf` slug = "/" ++ slug
| "/" `isPrefixOf` slug = slug ++ ".html"
| otherwise = "/" ++ slug ++ ".html"
-- | Minimal HTML attribute-value escape.
escAttr :: String -> String
escAttr = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '"' = "&quot;"
esc '\'' = "&#39;"
esc c = [c]
-- | Strip a suffix from a string, returning Nothing if not present.
stripSuffix :: String -> String -> Maybe String
stripSuffix suf str
| suf `isSuffixOf` str = Just (take (length str - length suf) str)
| otherwise = Nothing
-- | Strip leading and trailing spaces.
trim :: String -> String
trim = f . f
where f = reverse . dropWhile (== ' ')

View File

@ -9,9 +9,9 @@
module Filters.Typography (apply) where
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk)
import Utils (escapeHtmlText)
-- | Apply all typographic transformations to the document.
apply :: Pandoc -> Pandoc
@ -38,21 +38,17 @@ abbrevMap =
-- | If the Str token exactly matches a known abbreviation, replace it with
-- a @RawInline "html"@ @<abbr>@ element; otherwise leave it unchanged.
--
-- Both the @title@ attribute and the visible body pass through
-- 'escapeHtmlText' for consistency with every other raw-HTML emitter
-- in the filter pipeline. The abbreviations themselves are ASCII-safe
-- so this is defense-in-depth rather than a live hazard.
expandAbbrev :: Inline -> Inline
expandAbbrev (Str t) =
case lookup t abbrevMap of
Just title ->
RawInline "html" $
"<abbr title=\"" <> title <> "\">" <> escHtml t <> "</abbr>"
"<abbr title=\"" <> escapeHtmlText title <> "\">"
<> escapeHtmlText t <> "</abbr>"
Nothing -> Str t
expandAbbrev x = x
-- | Minimal HTML escaping for the abbr content (should be plain text).
escHtml :: Text -> Text
escHtml = T.concatMap esc
where
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '&' = "&amp;"
esc '"' = "&quot;"
esc c = T.singleton c

View File

@ -39,11 +39,14 @@ module Filters.Viz (inlineViz) where
import Control.Exception (IOException, catch)
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import System.Directory (doesFileExist)
import System.Exit (ExitCode (..))
import System.FilePath ((</>))
import System.IO (hPutStrLn, stderr)
import System.Process (readProcessWithExitCode)
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- ---------------------------------------------------------------------------
-- Public entry point
@ -87,19 +90,26 @@ transformBlock _ b = return b
-- ---------------------------------------------------------------------------
-- | Run @python3 <script>@. Returns the script\'s stdout on success, or an
-- error message on failure (non-zero exit or missing @script=@ attribute).
-- error message on failure (non-zero exit, missing @script=@ attribute, or
-- missing script file).
runScript :: FilePath -> [(T.Text, T.Text)] -> IO (Either String T.Text)
runScript baseDir attrs =
case lookup "script" attrs of
Nothing -> return (Left "missing script= attribute")
Just p -> do
let fullPath = baseDir </> T.unpack p
exists <- doesFileExist fullPath
if not exists
then return (Left ("script not found: " ++ fullPath))
else do
(ec, out, err) <-
readProcessWithExitCode "python3" [fullPath] ""
`catch` (\e -> return (ExitFailure 1, "", show (e :: IOException)))
return $ case ec of
ExitSuccess -> Right (T.pack out)
ExitFailure _ -> Left (if null err then "non-zero exit" else err)
ExitFailure _ -> Left $
"in " ++ fullPath ++ ": "
++ (if null err then "non-zero exit" else err)
-- ---------------------------------------------------------------------------
-- SVG colour post-processing (mirrors Filters.Score.processColors)
@ -173,10 +183,7 @@ attr :: T.Text -> [(T.Text, T.Text)] -> T.Text
attr key kvs = fromMaybe "" (lookup key kvs)
warn :: String -> String -> IO ()
warn kind msg = putStrLn $ "[Viz] " ++ kind ++ " error: " ++ msg
warn kind msg = hPutStrLn stderr $ "[Viz] " ++ kind ++ " error: " ++ msg
escHtml :: T.Text -> T.Text
escHtml = T.replace "&" "&amp;"
. T.replace "<" "&lt;"
. T.replace ">" "&gt;"
. T.replace "\"" "&quot;"
escHtml = U.escapeHtmlText

View File

@ -15,6 +15,7 @@ module Filters.Wikilinks (preprocess) where
import Data.Char (isAlphaNum, toLower, isSpace)
import Data.List (intercalate)
import qualified Utils as U
-- | Scan the raw Markdown source for @[[…]]@ wikilinks and replace them
-- with standard Markdown link syntax.
@ -29,21 +30,49 @@ preprocess ('[':'[':rest) =
preprocess (c:rest) = c : preprocess rest
-- | Convert the inner content of @[[…]]@ to a Markdown link.
--
-- Display text is escaped via 'escMdLinkText' so that a literal @]@, @[@,
-- or backslash in the display does not break the surrounding Markdown
-- link syntax. The URL itself is produced by 'slugify' and therefore only
-- ever contains @[a-z0-9-]@, so no URL-side encoding is needed — adding
-- one would be defense against a character set we can't produce.
toMarkdownLink :: String -> String
toMarkdownLink inner =
let (title, display) = splitOnPipe inner
url = "/" ++ slugify title
in "[" ++ display ++ "](" ++ url ++ ")"
in "[" ++ escMdLinkText display ++ "](" ++ url ++ ")"
-- | Escape the minimum set of characters that would prematurely terminate
-- a Markdown link's display-text segment: backslash (escape char), @[@,
-- and @]@. Backslash MUST be escaped first so the escapes we introduce
-- for @[@ and @]@ are not themselves re-escaped.
--
-- Deliberately NOT escaped: @_@, @*@, @\`@, @<@. Those are inline
-- formatting markers in Markdown and escaping them would strip the
-- author's ability to put emphasis, code, or inline HTML in a wikilink's
-- display text.
escMdLinkText :: String -> String
escMdLinkText = concatMap esc
where
esc '\\' = "\\\\"
esc '[' = "\\["
esc ']' = "\\]"
esc c = [c]
-- | Split on the first @|@; if none, display = title.
splitOnPipe :: String -> (String, String)
splitOnPipe s =
case break (== '|') s of
(title, '|':display) -> (trim title, trim display)
_ -> (trim s, trim s)
(title, '|':display) -> (U.trim title, U.trim display)
_ -> (U.trim s, U.trim s)
-- | Produce a URL slug: lowercase, words joined by hyphens,
-- non-alphanumeric characters removed.
--
-- Trailing punctuation is dropped rather than preserved as a dangling
-- hyphen — @slugify "end." == "end"@, not @"end-"@. This is intentional:
-- author-authored wikilinks tend to end sentences with a period and the
-- desired URL is almost always the terminal-punctuation-free form.
slugify :: String -> String
slugify = intercalate "-" . words . map toLowerAlnum
where
@ -55,5 +84,3 @@ slugify = intercalate "-" . words . map toLowerAlnum
-- split correctly and double-hyphens are
-- collapsed by 'words'
trim :: String -> String
trim = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ')

View File

@ -1,2 +0,0 @@
-- | Metadata utilities (Phase 2+).
module Metadata where

100
build/Patterns.hs Normal file
View File

@ -0,0 +1,100 @@
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Canonical content-pattern definitions, shared across modules.
--
-- Several modules need to enumerate "all author-written content" or
-- "all essays". Historically each module hard-coded its own slightly
-- different list, which produced silent omissions (e.g. directory-form
-- essays not appearing on author pages). This module is the single source
-- of truth — every place that needs a content pattern should import from
-- here, not write its own.
module Patterns
( -- * Per-section patterns
essayPattern
, draftEssayPattern
, blogPattern
, poetryPattern
, fictionPattern
, musicPattern
, standalonePagesPattern
-- * Aggregated patterns
, allWritings -- essays + blog + poetry + fiction
, allContent -- everything that backlinks should index
, authorIndexable -- everything that should appear on /authors/{slug}/
, tagIndexable -- everything that should appear on /<tag>/
) where
import Hakyll
-- ---------------------------------------------------------------------------
-- Per-section
-- ---------------------------------------------------------------------------
-- | All published essays — flat files and directory-based (with co-located
-- assets). Drafts under @content/drafts/essays/**@ are intentionally NOT
-- included; 'Site.rules' unions them in conditionally when @SITE_ENV=dev@.
essayPattern :: Pattern
essayPattern = "content/essays/*.md" .||. "content/essays/*/index.md"
-- | In-progress essay drafts. Matches the flat and directory forms under
-- @content/drafts/essays/@. Only 'Site.rules' consumes this, gated on
-- @SITE_ENV=dev@ — every other module that enumerates content (Authors,
-- Tags, Backlinks, Stats, feeds) sees only 'essayPattern', so drafts are
-- automatically invisible to listings, tags, authors, backlinks, and stats.
draftEssayPattern :: Pattern
draftEssayPattern =
"content/drafts/essays/*.md"
.||. "content/drafts/essays/*/index.md"
-- | All blog posts. Currently flat-only; co-located blog assets would
-- require a directory variant analogous to 'essayPattern'.
blogPattern :: Pattern
blogPattern = "content/blog/*.md"
-- | All poetry: flat poems plus collection poems, excluding collection
-- index pages (which are landing pages, not poems).
poetryPattern :: Pattern
poetryPattern =
"content/poetry/*.md"
.||. ("content/poetry/*/*.md" .&&. complement "content/poetry/*/index.md")
-- | All fiction. Currently flat-only.
fictionPattern :: Pattern
fictionPattern = "content/fiction/*.md"
-- | Music compositions (landing pages live at @content/music/<slug>/index.md@).
musicPattern :: Pattern
musicPattern = "content/music/*/index.md"
-- | Top-level standalone pages (about, colophon, current, gpg, …).
standalonePagesPattern :: Pattern
standalonePagesPattern = "content/*.md"
-- ---------------------------------------------------------------------------
-- Aggregations
-- ---------------------------------------------------------------------------
-- | All long-form authored writings.
allWritings :: Pattern
allWritings = essayPattern .||. blogPattern .||. poetryPattern .||. fictionPattern
-- | Every content file the backlinks pass should index. Includes music
-- landing pages and top-level standalone pages, in addition to writings.
allContent :: Pattern
allContent =
essayPattern
.||. blogPattern
.||. poetryPattern
.||. fictionPattern
.||. musicPattern
.||. standalonePagesPattern
-- | Content shown on author index pages — essays + blog posts.
-- (Poetry and fiction have their own dedicated indexes and are not
-- aggregated by author.)
authorIndexable :: Pattern
authorIndexable = (essayPattern .||. blogPattern) .&&. hasNoVersion
-- | Content shown on tag index pages — essays + blog posts.
tagIndexable :: Pattern
tagIndexable = (essayPattern .||. blogPattern) .&&. hasNoVersion

View File

@ -18,10 +18,12 @@
module SimilarLinks (similarLinksField) where
import Data.Maybe (fromMaybe)
import qualified Data.ByteString as BS
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Aeson as Aeson
import Hakyll
@ -83,7 +85,29 @@ normaliseUrl url =
t3 = fromMaybe t2 (T.stripSuffix "index.html" t2)
-- strip bare .html extension only for non-index pages
t4 = fromMaybe t3 (T.stripSuffix ".html" t3)
in T.unpack t4
in percentDecode (T.unpack t4)
-- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths
-- collide with their decoded form on map lookup. Mirrors
-- 'Backlinks.percentDecode'; the two implementations are intentionally
-- duplicated because they apply different normalisations *before*
-- decoding (Backlinks strips @.html@ unconditionally; SimilarLinks
-- preserves the trailing-slash form for index pages).
percentDecode :: String -> String
percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
where
go [] = []
go ('%':a:b:rest)
| Just hi <- hexDigit a
, Just lo <- hexDigit b
= fromIntegral (hi * 16 + lo) : go rest
go (c:rest) = fromIntegral (fromEnum c) : go rest
hexDigit c
| c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0')
| c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10)
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
| otherwise = Nothing
-- ---------------------------------------------------------------------------
-- HTML rendering

View File

@ -2,10 +2,13 @@
{-# LANGUAGE OverloadedStrings #-}
module Site (rules) where
import Control.Monad (filterM)
import Data.List (intercalate, isPrefixOf)
import Control.Monad (filterM, when)
import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe)
import System.Environment (lookupEnv)
import System.FilePath (takeDirectory, takeFileName, replaceExtension)
import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy.Char8 as LBS
import Hakyll
import Authors (buildAllAuthors, applyAuthorRules)
import Backlinks (backlinkRules)
@ -15,14 +18,11 @@ import Catalog (musicCatalogCtx)
import Commonplace (commonplaceCtx)
import Contexts (siteCtx, essayCtx, postCtx, pageCtx, poetryCtx, fictionCtx, compositionCtx,
contentKindField)
import qualified Patterns as P
import Tags (buildAllTags, applyTagRules)
import Pagination (blogPaginateRules)
import Stats (statsRules)
-- All essays: flat files and directory-based (with co-located assets).
allEssays :: Pattern
allEssays = "content/essays/*.md" .||. "content/essays/*/index.md"
-- Poems inside collection subdirectories, excluding their index pages.
collectionPoems :: Pattern
collectionPoems = "content/poetry/*/*.md" .&&. complement "content/poetry/*/index.md"
@ -51,6 +51,16 @@ musicFeedConfig = FeedConfiguration
rules :: Rules ()
rules = do
-- ---------------------------------------------------------------------------
-- Build mode. SITE_ENV=dev (set by `make dev` / `make watch`) includes
-- drafts under content/drafts/**; anything else (unset, "deploy", "build")
-- excludes them entirely from every match, listing, and asset rule below.
-- ---------------------------------------------------------------------------
isDev <- preprocess $ (== Just "dev") <$> lookupEnv "SITE_ENV"
let allEssays = if isDev
then P.essayPattern .||. P.draftEssayPattern
else P.essayPattern
-- ---------------------------------------------------------------------------
-- Backlinks (pass 1: link extraction; pass 2: JSON generation)
-- Must run before content rules so dependencies resolve correctly.
@ -70,8 +80,14 @@ rules = do
applyTagRules tags siteCtx
statsRules tags
-- Per-page JS files — authored alongside content in content/**/*.js
match "content/**/*.js" $ do
-- Per-page JS files — authored alongside content in content/**/*.js.
-- Draft JS is handled by a separate dev-only rule below.
match ("content/**/*.js" .&&. complement "content/drafts/**") $ do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
-- Per-page JS co-located with draft essays (dev-only).
when isDev $ match "content/drafts/**/*.js" $ do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
@ -177,14 +193,25 @@ rules = do
-- ---------------------------------------------------------------------------
-- Essays — flat (content/essays/foo.md → essays/foo.html) and
-- directory-based (content/essays/slug/index.md → essays/slug/index.html)
-- directory-based (content/essays/slug/index.md → essays/slug/index.html).
-- In dev mode, drafts under content/drafts/essays/ route to
-- drafts/essays/foo.html (flat) or drafts/essays/slug/index.html (dir).
-- ---------------------------------------------------------------------------
match allEssays $ do
route $ customRoute $ \ident ->
let fp = toFilePath ident
in if takeFileName fp == "index.md"
then replaceExtension (drop 8 fp) "html"
else "essays/" ++ replaceExtension (takeFileName fp) "html"
fname = takeFileName fp
isIndex = fname == "index.md"
isDraft = "content/drafts/essays/" `isPrefixOf` fp
in case (isDraft, isIndex) of
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
(True, True) -> replaceExtension (drop 8 fp) "html"
-- content/drafts/essays/foo.md → drafts/essays/foo.html
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
-- content/essays/slug/index.md → essays/slug/index.html
(False, True) -> replaceExtension (drop 8 fp) "html"
-- content/essays/foo.md → essays/foo.html
(False, False) -> "essays/" ++ replaceExtension fname "html"
compile $ essayCompiler
>>= saveSnapshot "content"
>>= loadAndApplyTemplate "templates/essay.html" essayCtx
@ -198,6 +225,13 @@ rules = do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
-- Static assets co-located with draft essays (dev-only).
when isDev $ match ("content/drafts/essays/**"
.&&. complement "content/drafts/essays/*.md"
.&&. complement "content/drafts/essays/*/index.md") $ do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
-- ---------------------------------------------------------------------------
-- Blog posts
-- ---------------------------------------------------------------------------
@ -400,7 +434,7 @@ rules = do
poetry <- loadAll ("content/poetry/*.md" .&&. hasNoVersion) :: Compiler [Item String]
routes <- mapM (getRoute . itemIdentifier) (essays ++ posts ++ fiction ++ poetry)
let urls = [ "/" ++ r | Just r <- routes ]
makeItem $ "[" ++ intercalate "," (map show urls) ++ "]"
makeItem $ LBS.unpack (Aeson.encode urls)
-- ---------------------------------------------------------------------------
-- Atom feed — all content sorted by date

View File

@ -29,7 +29,9 @@ import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
import Data.Time.Calendar (Day, diffDays)
import Data.Time.Format (parseTimeM, formatTime, defaultTimeLocale)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Exit (ExitCode (..))
import System.IO (hPutStrLn, stderr)
import System.Process (readProcessWithExitCode)
import Hakyll
@ -39,9 +41,12 @@ import Hakyll
-- | Read @IGNORE.txt@ (paths relative to project root, one per line).
-- Returns an empty list when the file is absent or empty.
--
-- Uses strict text IO so the file handle is released immediately rather
-- than left dangling on the lazy spine of 'readFile'.
readIgnore :: IO [FilePath]
readIgnore =
(filter (not . null) . lines <$> readFile "IGNORE.txt")
(filter (not . null) . map T.unpack . T.lines <$> TIO.readFile "IGNORE.txt")
`catch` \(_ :: IOException) -> return []
-- ---------------------------------------------------------------------------
@ -49,13 +54,24 @@ readIgnore =
-- ---------------------------------------------------------------------------
-- | Return commit dates (ISO "YYYY-MM-DD", newest-first) for @fp@.
--
-- Logs git's stderr to the build's stderr when present so the author
-- isn't left in the dark when a file isn't tracked yet (the warning
-- otherwise vanishes silently).
gitDates :: FilePath -> IO [String]
gitDates fp = do
(ec, out, _) <- readProcessWithExitCode
(ec, out, err) <- readProcessWithExitCode
"git" ["log", "--follow", "--format=%ad", "--date=short", "--", fp] ""
case ec of
ExitFailure _ -> return []
ExitSuccess -> return $ filter (not . null) (lines out)
ExitFailure _ -> do
let msg = if null err then "git log failed" else err
hPutStrLn stderr $ "[Stability] " ++ fp ++ ": " ++ msg
return []
ExitSuccess -> do
case err of
"" -> return ()
_ -> hPutStrLn stderr $ "[Stability] " ++ fp ++ ": " ++ err
return $ filter (not . null) (lines out)
-- | Parse an ISO "YYYY-MM-DD" string to a 'Day'.
parseIso :: String -> Maybe Day
@ -69,18 +85,39 @@ daySpan oldest newest =
_ -> 0
-- | Derive stability label from commit dates (newest-first).
--
-- Thresholds (commit count + age in days since first commit):
--
-- * @volatile@ — solo commit OR less than two weeks old.
-- * @revising@ — under six commits AND under three months old.
-- * @fairly stable@ — under sixteen commits OR under one year old.
-- * @stable@ — under thirty-one commits OR under two years old.
-- * @established@ — anything beyond.
--
-- These cliffs are deliberately conservative: a fast burst of commits
-- early in a piece's life looks volatile until enough time has passed
-- to demonstrate it has settled.
stabilityFromDates :: [String] -> String
stabilityFromDates [] = "volatile"
stabilityFromDates dates =
classify (length dates) (daySpan (last dates) (head dates))
stabilityFromDates dates@(newest : _) =
let oldest = case reverse dates of
(x : _) -> x
[] -> newest -- unreachable; matched above
in classify (length dates) (daySpan oldest newest)
where
classify n age
| n <= 1 || age < 14 = "volatile"
| n <= 5 && age < 90 = "revising"
| n <= 15 || age < 365 = "fairly stable"
| n <= 30 || age < 730 = "stable"
| n <= 1 || age < volatileAge = "volatile"
| n <= 5 && age < revisingAge = "revising"
| n <= 15 || age < fairlyStableAge = "fairly stable"
| n <= 30 || age < stableAge = "stable"
| otherwise = "established"
volatileAge, revisingAge, fairlyStableAge, stableAge :: Int
volatileAge = 14
revisingAge = 90
fairlyStableAge = 365
stableAge = 730
-- | Format an ISO date as "%-d %B %Y" (e.g. "16 March 2026").
fmtIso :: String -> String
fmtIso s = case parseIso s of

View File

@ -8,15 +8,18 @@ module Stats (statsRules) where
import Control.Exception (IOException, catch)
import Control.Monad (forM)
import Data.List (find, isSuffixOf, sort, sortBy)
import Data.Char (isSpace, toLower)
import Data.List (find, isPrefixOf, isSuffixOf, sort, sortBy)
import qualified Data.Map.Strict as Map
import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
import Data.Ord (comparing, Down (..))
import qualified Data.Set as Set
import Data.String (fromString)
import Data.Time (getCurrentTime, formatTime, defaultTimeLocale,
Day, parseTimeM, utctDay, addDays, diffDays)
import Data.Time.Calendar (toGregorian, dayOfWeek)
import System.Directory (doesDirectoryExist, getFileSize, listDirectory)
import System.Directory (doesDirectoryExist, getFileSize, listDirectory,
pathIsSymbolicLink)
import System.Exit (ExitCode (..))
import System.FilePath (takeExtension, (</>))
import System.Process (readProcessWithExitCode)
@ -26,10 +29,15 @@ import qualified Data.Aeson.Key as AK
import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as TE
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Text.Blaze.Html.Renderer.String (renderHtml)
import qualified Text.Blaze.Internal as BI
import Hakyll
import Authors (authorLinksField)
import Contexts (siteCtx)
import Contexts (siteCtx, authorLinksField)
import qualified Patterns as P
import Utils (readingTime)
-- ---------------------------------------------------------------------------
@ -100,10 +108,49 @@ pctStr _ 0 = "—"
pctStr n total = show (n * 100 `div` total) ++ "%"
-- | Strip HTML tags for plain-text word counting.
--
-- Handles:
-- * Tag bodies, including @>@ inside double-quoted attribute values
-- (so @\<img alt=\"a > b\"\>@ doesn't slice the surrounding text).
-- * HTML comments @\<!-- ... --\>@ as a unit.
-- * @\<![CDATA[ ... ]]\>@ sections.
--
-- This is still a heuristic — it does not validate the HTML — but it
-- closes the most common ways for "tag stripping" to leak content.
stripHtmlTags :: String -> String
stripHtmlTags [] = []
stripHtmlTags ('<':rest) = stripHtmlTags (drop 1 (dropWhile (/= '>') rest))
stripHtmlTags (c:rest) = c : stripHtmlTags rest
stripHtmlTags = go
where
go [] = []
go ('<':'!':'-':'-':rest) = go (dropComment rest)
go ('<':'!':'[':'C':'D':'A':'T':'A':'[':rest)
= go (dropCdata rest)
go ('<':rest) = go (dropTag rest)
go (c:rest) = c : go rest
-- Drop everything up to and including "-->".
dropComment ('-':'-':'>':rs) = rs
dropComment (_:rs) = dropComment rs
dropComment [] = []
-- Drop everything up to and including "]]>".
dropCdata (']':']':'>':rs) = rs
dropCdata (_:rs) = dropCdata rs
dropCdata [] = []
-- Drop a tag body, respecting double-quoted attribute values.
dropTag ('"':rs) = dropTag (skipQuoted rs)
dropTag ('\'':rs) = dropTag (skipApos rs)
dropTag ('>':rs) = rs
dropTag (_:rs) = dropTag rs
dropTag [] = []
skipQuoted ('"':rs) = rs
skipQuoted (_:rs) = skipQuoted rs
skipQuoted [] = []
skipApos ('\'':rs) = rs
skipApos (_:rs) = skipApos rs
skipApos [] = []
-- | Normalise a page URL for backlink map lookup (strip trailing .html).
normUrl :: String -> String
@ -114,10 +161,16 @@ normUrl u
pad2 :: (Show a, Integral a) => a -> String
pad2 n = if n < 10 then "0" ++ show n else show n
-- | Median of a non-empty list; returns 0 for empty.
-- | Median of a non-empty list; returns 0 for empty. Uses 'drop' +
-- pattern match instead of @(!!)@ so the function is total in its
-- own implementation, not just by external invariant.
median :: [Int] -> Int
median [] = 0
median xs = let s = sort xs in s !! (length s `div` 2)
median xs =
case drop (length xs `div` 2) (sort xs) of
(m : _) -> m
[] -> 0 -- unreachable: length xs >= 1 above
-- ---------------------------------------------------------------------------
-- Date helpers (for /stats/ page)
@ -145,12 +198,93 @@ shortMonth m = case m of
9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec"
_ -> ""
-- ---------------------------------------------------------------------------
-- URL sanitization and core HTML combinators
-- ---------------------------------------------------------------------------
-- | Defense-in-depth URL allowlist: reject anything that isn't an internal
-- path, a fragment, or an explicit safe scheme. Case-insensitive and
-- whitespace-tolerant to block @JavaScript:@, @\tjavascript:@, @data:@, etc.
-- @http://@ is intentionally excluded to avoid mixed-content warnings.
--
-- Protocol-relative URLs (@//evil.com@) are rejected because the leading
-- slash would otherwise admit them through the @\"\/\"@ prefix check.
isSafeUrl :: String -> Bool
isSafeUrl u =
let norm = map toLower (dropWhile isSpace u)
in not ("//" `isPrefixOf` norm)
&& any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
safeHref :: String -> H.AttributeValue
safeHref u
| isSafeUrl u = H.stringValue u
| otherwise = H.stringValue "#"
-- | Shorthand for 'H.toHtml' over a 'String'.
txt :: String -> H.Html
txt = H.toHtml
-- | Anchor element with escaped title text and URL sanitized via 'safeHref'.
-- Use for trusted plain-text labels such as tag slugs.
link :: String -> String -> H.Html
link url title = H.a H.! A.href (safeHref url) $ H.toHtml title
-- | Anchor for a content page, where the title comes from frontmatter and
-- may contain author-authored inline HTML (e.g. @<em>Book Title</em>@).
-- The URL is still sanitized via 'safeHref'; the title is emitted
-- pre-escaped, matching site convention that metadata titles are
-- author-controlled trusted HTML.
pageLink :: String -> String -> H.Html
pageLink url title = H.a H.! A.href (safeHref url) $ H.preEscapedToHtml title
-- | Typed section header followed by its body content.
section :: String -> String -> H.Html -> H.Html
section id_ title body = do
H.h2 H.! A.id (H.stringValue id_) $ H.toHtml title
body
-- | Build-telemetry table with header row, body rows, and an optional total
-- row. Cell contents are pre-rendered 'H.Html' so callers may embed links or
-- emphasis inside cells without risking double-escaping.
table :: [String] -> [[H.Html]] -> Maybe [H.Html] -> H.Html
table headers rows mFoot =
H.table H.! A.class_ "build-table" $ do
H.thead $ H.tr $ mapM_ (H.th . H.toHtml) headers
H.tbody $ mapM_ renderRow rows
maybe (return ()) renderFoot mFoot
where
renderRow cells = H.tr $ mapM_ H.td cells
renderFoot cells = H.tfoot $
H.tr H.! A.class_ "build-total" $ mapM_ H.td cells
-- | Two-column metadata block: each pair becomes a @<dt>/<dd>@ entry. Values
-- are 'H.Html' to allow mixing links and plain text.
dl :: [(String, H.Html)] -> H.Html
dl pairs = H.dl H.! A.class_ "build-meta" $
mapM_ (\(k, v) -> do H.dt (H.toHtml k); H.dd v) pairs
-- ---------------------------------------------------------------------------
-- SVG / custom element helpers (no blaze-svg dependency)
-- ---------------------------------------------------------------------------
svgTag, rectTag, textTag, titleTag :: H.Html -> H.Html
svgTag = BI.customParent "svg"
rectTag = BI.customParent "rect"
textTag = BI.customParent "text"
titleTag = BI.customParent "title"
-- | Attach an attribute that isn't in 'Text.Blaze.Html5.Attributes' (e.g.
-- SVG @viewBox@, @x@, @y@, or @data-target@).
customAttr :: String -> String -> H.Attribute
customAttr name val = BI.customAttribute (fromString name) (fromString val)
-- ---------------------------------------------------------------------------
-- Heatmap SVG
-- ---------------------------------------------------------------------------
-- | 52-week writing activity heatmap (inline SVG, CSS-variable colors).
renderHeatmap :: Map.Map Day Int -> Day -> String
-- | 52-week writing activity heatmap. Styled via @.heatmap-svg@ rules in
-- static/css/build.css (no inline @<style>@).
renderHeatmap :: Map.Map Day Int -> Day -> H.Html
renderHeatmap wordsByDay today =
let cellSz = 10 :: Int
gap = 2 :: Int
@ -166,62 +300,67 @@ renderHeatmap wordsByDay today =
svgW = (nWeeks - 1) * step + cellSz
svgH = 6 * step + cellSz + hdrH
-- Month labels: one per first-of-month day
monthLbls = concatMap (\d ->
monthLabel d =
let (_, mo, da) = toGregorian d
in if da == 1
then "<text class=\"hm-lbl\" x=\"" ++ show (weekOf d * step)
++ "\" y=\"14\">" ++ shortMonth mo ++ "</text>"
else "") allDays
then textTag H.! A.class_ "hm-lbl"
H.! customAttr "x" (show (weekOf d * step))
H.! customAttr "y" "14"
$ txt (shortMonth mo)
else mempty
-- One rect per day
cells = concatMap (\d ->
dayCell d =
let wc = fromMaybe 0 (Map.lookup d wordsByDay)
(yr, mo, da) = toGregorian d
x = weekOf d * step
y = dowOf d * step + hdrH
tip = show yr ++ "-" ++ pad2 mo ++ "-" ++ pad2 da
++ if wc > 0 then ": " ++ commaInt wc ++ " words" else ""
in "<rect class=\"" ++ heatClass wc ++ "\""
++ " x=\"" ++ show x ++ "\" y=\"" ++ show y ++ "\""
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\""
++ " rx=\"2\"><title>" ++ tip ++ "</title></rect>") allDays
in rectTag H.! A.class_ (H.stringValue (heatClass wc))
H.! customAttr "x" (show x)
H.! customAttr "y" (show y)
H.! customAttr "width" (show cellSz)
H.! customAttr "height" (show cellSz)
H.! customAttr "rx" "2"
$ titleTag (txt tip)
-- Inline legend (five sample rects)
legendW = 5 * step - gap
legendSvg =
"<svg width=\"" ++ show legendW ++ "\" height=\"" ++ show cellSz ++ "\""
++ " viewBox=\"0 0 " ++ show legendW ++ " " ++ show cellSz ++ "\""
++ " style=\"display:inline;vertical-align:middle\">"
++ concatMap (\i ->
"<rect class=\"hm" ++ show i ++ "\""
++ " x=\"" ++ show (i * step) ++ "\" y=\"0\""
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\""
++ " rx=\"2\"/>") [0..4]
++ "</svg>"
legendCell i =
rectTag H.! A.class_ (H.stringValue ("hm" ++ show i))
H.! customAttr "x" (show (i * step))
H.! customAttr "y" "0"
H.! customAttr "width" (show cellSz)
H.! customAttr "height" (show cellSz)
H.! customAttr "rx" "2"
$ mempty
in "<figure class=\"stats-heatmap\">"
++ "<svg width=\"" ++ show svgW ++ "\" height=\"" ++ show svgH ++ "\""
++ " viewBox=\"0 0 " ++ show svgW ++ " " ++ show svgH ++ "\""
++ " class=\"heatmap-svg\" role=\"img\""
++ " aria-label=\"52-week writing activity heatmap\">"
++ "<style>"
++ ".hm0{fill:var(--hm-0)}.hm1{fill:var(--hm-1)}.hm2{fill:var(--hm-2)}"
++ ".hm3{fill:var(--hm-3)}.hm4{fill:var(--hm-4)}"
++ ".hm-lbl{font-size:9px;fill:var(--text-faint);font-family:sans-serif}"
++ "</style>"
++ monthLbls ++ cells
++ "</svg>"
++ "<figcaption class=\"heatmap-legend\">"
++ "Less\xA0" ++ legendSvg ++ "\xA0More"
++ "</figcaption>"
++ "</figure>"
legendSvg =
svgTag H.! customAttr "width" (show legendW)
H.! customAttr "height" (show cellSz)
H.! customAttr "viewBox" ("0 0 " ++ show legendW ++ " " ++ show cellSz)
H.! customAttr "style" "display:inline;vertical-align:middle"
$ mapM_ legendCell [0 .. 4 :: Int]
in H.figure H.! A.class_ "stats-heatmap" $ do
svgTag H.! customAttr "width" (show svgW)
H.! customAttr "height" (show svgH)
H.! customAttr "viewBox" ("0 0 " ++ show svgW ++ " " ++ show svgH)
H.! A.class_ "heatmap-svg"
H.! customAttr "role" "img"
H.! customAttr "aria-label" "52-week writing activity heatmap"
$ do
mapM_ monthLabel allDays
mapM_ dayCell allDays
H.figcaption H.! A.class_ "heatmap-legend" $ do
"Less\xA0"
legendSvg
"\xA0More"
-- ---------------------------------------------------------------------------
-- Stats page sections
-- ---------------------------------------------------------------------------
renderMonthlyVolume :: Map.Map Day Int -> String
renderMonthlyVolume :: Map.Map Day Int -> H.Html
renderMonthlyVolume wordsByDay =
section "volume" "Monthly volume" $
let byMonth = Map.fromListWith (+)
@ -230,71 +369,80 @@ renderMonthlyVolume wordsByDay =
, let (y, m, _) = toGregorian day
]
in if Map.null byMonth
then "<p><em>No dated content yet.</em></p>"
then H.p (H.em "No dated content yet.")
else
let maxWC = max 1 $ maximum $ Map.elems byMonth
bar (y, m) =
let wc = fromMaybe 0 (Map.lookup (y, m) byMonth)
pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC)
lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y)
in "<div class=\"build-bar-row\">"
++ "<span class=\"build-bar-label\">" ++ lbl ++ "</span>"
++ "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:"
++ show pct ++ "%\"></span></span>"
++ "<span class=\"build-bar-count\">"
++ (if wc > 0 then commaInt wc else "") ++ "</span>"
++ "</div>"
in "<div class=\"build-bars\">" ++ concatMap bar (Map.keys byMonth) ++ "</div>"
in H.div H.! A.class_ "build-bar-row" $ do
H.span H.! A.class_ "build-bar-label" $ txt lbl
H.span H.! A.class_ "build-bar-wrap" $
H.span H.! A.class_ "build-bar"
H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
$ mempty
H.span H.! A.class_ "build-bar-count" $
if wc > 0 then txt (commaInt wc) else mempty
in H.div H.! A.class_ "build-bars" $
mapM_ bar (Map.keys byMonth)
renderCorpus :: [TypeRow] -> [PageInfo] -> String
renderCorpus :: [TypeRow] -> [PageInfo] -> H.Html
renderCorpus typeRows allPIs =
section "corpus" "Corpus" $ concat
[ dl [ ("Total words", commaInt totalWords)
, ("Total pages", commaInt (length allPIs))
, ("Total reading time", rtStr totalWords)
, ("Average length", commaInt avgWC ++ " words")
, ("Median length", commaInt medWC ++ " words")
section "corpus" "Corpus" $ do
dl [ ("Total words", txt (commaInt totalWords))
, ("Total pages", txt (commaInt (length allPIs)))
, ("Total reading time", txt (rtStr totalWords))
, ("Average length", txt (commaInt avgWC ++ " words"))
, ("Median length", txt (commaInt medWC ++ " words"))
]
, table ["Type", "Pages", "Words", "Reading time"]
table ["Type", "Pages", "Words", "Reading time"]
(map row typeRows)
(Just ["Total", commaInt (sum (map trCount typeRows))
, commaInt totalWords, rtStr totalWords])
]
(Just [ "Total"
, txt (commaInt (sum (map trCount typeRows)))
, txt (commaInt totalWords)
, txt (rtStr totalWords)
])
where
hasSomeWC = filter (\p -> piWC p > 0) allPIs
totalWords = sum (map trWords typeRows)
avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC
medWC = median (map piWC hasSomeWC)
row r = [trLabel r, commaInt (trCount r), commaInt (trWords r), rtStr (trWords r)]
renderNotable :: [PageInfo] -> String
renderNotable allPIs =
section "notable" "Notable" $ concat
[ "<p><strong>Longest</strong></p>"
, pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>"
, pageList (take 5 (sortBy (comparing piWC) hasSomeWC))
row r = [ txt (trLabel r)
, txt (commaInt (trCount r))
, txt (commaInt (trWords r))
, txt (rtStr (trWords r))
]
renderNotable :: [PageInfo] -> H.Html
renderNotable allPIs =
section "notable" "Notable" $ do
H.p (H.strong "Longest")
pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC))
H.p (H.strong "Shortest")
pageList (take 5 (sortBy (comparing piWC) hasSomeWC))
where
hasSomeWC = filter (\p -> piWC p > 50) allPIs
pageList ps = "<ol class=\"build-page-list\">"
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p)
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps
++ "</ol>"
pageList ps = H.ol H.! A.class_ "build-page-list" $
mapM_ (\p -> H.li $ do
pageLink (piUrl p) (piTitle p)
txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
) ps
renderStatsTags :: [(String, Int)] -> Int -> String
renderStatsTags topTags uniqueCount =
section "tags" "Tags" $ concat
[ dl [("Unique tags", commaInt uniqueCount)]
, table ["Tag", "Items"] (map row topTags) Nothing
]
where row (t, n) = [link ("/" ++ t ++ "/") t, show n]
-- | Renamed/aliased to 'renderTagsSection' below — kept as a name for
-- legacy call sites until they are migrated. Defining it as the same
-- function (instead of an independent copy) prevents the two from
-- drifting silently.
renderStatsTags :: [(String, Int)] -> Int -> H.Html
renderStatsTags = renderTagsSection
statsTOC :: String
statsTOC = "<ol>\n" ++ concatMap item entries ++ "</ol>\n"
statsTOC :: H.Html
statsTOC = H.ol $ mapM_ item entries
where
item (i, t) = "<li><a href=\"#" ++ i ++ "\" data-target=\"" ++ i ++ "\">"
++ t ++ "</a></li>\n"
item (i, t) =
H.li $ H.a H.! A.href (H.stringValue ("#" ++ i))
H.! customAttr "data-target" i
$ txt t
entries = [ ("activity", "Writing activity")
, ("volume", "Monthly volume")
, ("corpus", "Corpus")
@ -306,16 +454,27 @@ statsTOC = "<ol>\n" ++ concatMap item entries ++ "</ol>\n"
-- IO: output directory walk
-- ---------------------------------------------------------------------------
-- | Recursively walk a directory, returning @(file, size)@ tuples for every
-- regular file beneath it.
--
-- Symlinks (both files and directories) are skipped, so a stray
-- @_site\/a -> _site@ doesn't trigger an infinite loop.
walkDir :: FilePath -> IO [(FilePath, Integer)]
walkDir dir = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
fmap concat $ forM entries $ \e -> do
let path = dir </> e
isLink <- pathIsSymbolicLink path
`catch` (\(_ :: IOException) -> return False)
if isLink
then return []
else do
isDir <- doesDirectoryExist path
if isDir
then walkDir path
else do
sz <- getFileSize path `catch` (\(_ :: IOException) -> return 0)
sz <- getFileSize path
`catch` (\(_ :: IOException) -> return 0)
return [(path, sz)]
displayExt :: FilePath -> String
@ -354,9 +513,13 @@ countLinesDir :: FilePath -> String -> (FilePath -> Bool) -> IO (Int, Int)
countLinesDir dir ext skipPred = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries
-- Use strict text IO so the file handle is released as soon as the
-- contents are read; the prior 'readFile' chained lazy IO under
-- 'forM', leaving every handle open until the loop forced 'lines'.
ls <- fmap sum $ forM files $ \e -> do
content <- readFile (dir </> e) `catch` (\(_ :: IOException) -> return "")
return (length (lines content))
content <- TIO.readFile (dir </> e)
`catch` (\(_ :: IOException) -> return T.empty)
return (length (T.lines content))
return (length files, ls)
getLocStats :: IO (Int, Int, Int, Int, Int, Int)
@ -385,125 +548,116 @@ getGitStats = do
return (commits, firstDate)
-- ---------------------------------------------------------------------------
-- HTML rendering: section helpers
-- HTML rendering: build page sections
-- ---------------------------------------------------------------------------
section :: String -> String -> String -> String
section id_ title body = concat
[ "<h2 id=\"", id_, "\">", title, "</h2>\n"
, body
]
table :: [String] -> [[String]] -> Maybe [String] -> String
table headers rows mFoot = concat
[ "<table class=\"build-table\">"
, "<thead><tr>", concatMap (\h -> "<th>" ++ h ++ "</th>") headers, "</tr></thead>"
, "<tbody>", concatMap renderRow rows, "</tbody>"
, maybe "" renderFoot mFoot
, "</table>"
]
where
renderRow cells = "<tr>" ++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells ++ "</tr>"
renderFoot cells = "<tfoot><tr class=\"build-total\">"
++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells
++ "</tr></tfoot>"
dl :: [(String, String)] -> String
dl pairs = "<dl class=\"build-meta\">"
++ concatMap (\(k, v) -> "<dt>" ++ k ++ "</dt><dd>" ++ v ++ "</dd>") pairs
++ "</dl>"
link :: String -> String -> String
link url title = "<a href=\"" ++ url ++ "\">" ++ title ++ "</a>"
-- ---------------------------------------------------------------------------
-- HTML rendering: sections
-- ---------------------------------------------------------------------------
renderContent :: [TypeRow] -> String
renderContent :: [TypeRow] -> H.Html
renderContent rows =
section "content" "Content" $
table
["Type", "Count", "Words", "Reading time"]
table ["Type", "Count", "Words", "Reading time"]
(map row rows)
(Just ["Total", commaInt totalCount, commaInt totalWords, rtStr totalWords])
(Just [ "Total"
, txt (commaInt totalCount)
, txt (commaInt totalWords)
, txt (rtStr totalWords)
])
where
totalCount = sum (map trCount rows)
totalWords = sum (map trWords rows)
row r = [ trLabel r
, commaInt (trCount r)
, commaInt (trWords r)
, rtStr (trWords r)
row r = [ txt (trLabel r)
, txt (commaInt (trCount r))
, txt (commaInt (trWords r))
, txt (rtStr (trWords r))
]
renderPages :: [PageInfo] -> Maybe (String,String,String) -> Maybe (String,String,String) -> String
renderPages :: [PageInfo]
-> Maybe (String, String, String)
-> Maybe (String, String, String)
-> H.Html
renderPages allPIs mOldest mNewest =
section "pages" "Pages" $ concat
[ dl $
[ ("Total pages", commaInt (length allPIs))
, ("Average length", commaInt avgWC ++ " words")
section "pages" "Pages" $ do
dl $
[ ("Total pages", txt (commaInt (length allPIs)))
, ("Average length", txt (commaInt avgWC ++ " words"))
] ++
maybe [] (\(d,t,u) -> [("Oldest content", d ++ " \x2014 " ++ link u t)]) mOldest ++
maybe [] (\(d,t,u) -> [("Newest content", d ++ " \x2014 " ++ link u t)]) mNewest
, "<p><strong>Longest</strong></p>"
, pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>"
, pageList (take 3 (sortBy (comparing piWC) hasSomeWC))
]
maybe [] (\(d,t,u) -> [("Oldest content", datedLink d t u)]) mOldest ++
maybe [] (\(d,t,u) -> [("Newest content", datedLink d t u)]) mNewest
H.p (H.strong "Longest")
pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC))
H.p (H.strong "Shortest")
pageList (take 3 (sortBy (comparing piWC) hasSomeWC))
where
hasSomeWC = filter (\p -> piWC p > 0) allPIs
avgWC = if null hasSomeWC then 0
else sum (map piWC hasSomeWC) `div` length hasSomeWC
pageList ps = "<ol class=\"build-page-list\">"
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p)
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps
++ "</ol>"
datedLink d t u = do
txt (d ++ " \x2014 ")
pageLink u t
pageList ps = H.ol H.! A.class_ "build-page-list" $
mapM_ (\p -> H.li $ do
pageLink (piUrl p) (piTitle p)
txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
) ps
renderDistribution :: [Int] -> String
renderDistribution :: [Int] -> H.Html
renderDistribution wcs =
section "distribution" "Word-length distribution" $
"<div class=\"build-bars\">" ++ concatMap bar buckets ++ "</div>"
H.div H.! A.class_ "build-bars" $ mapM_ bar buckets
where
bucketOf w
| w < 500 = 0 | w < 1000 = 1 | w < 2000 = 2 | w < 5000 = 3 | otherwise = 4
labels = ["&lt; 500", "500 \x2013 1k", "1k \x2013 2k", "2k \x2013 5k", "\x2265 5k"]
| w < 500 = 0
| w < 1000 = 1
| w < 2000 = 2
| w < 5000 = 3
| otherwise = 4
labels :: [H.Html]
labels = [ "< 500"
, "500 \x2013 1k"
, "1k \x2013 2k"
, "2k \x2013 5k"
, "\x2265 5k"
]
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc)
(Map.fromList [(i, 0 :: Int) | i <- [0..4]]) wcs
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0..4]]
(Map.fromList [(i, 0 :: Int) | i <- [0 .. 4]]) wcs
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0 .. 4]]
maxCount = max 1 (maximum (map snd buckets))
bar (lbl, n) =
let pct = n * 100 `div` maxCount
in concat
[ "<div class=\"build-bar-row\">"
, "<span class=\"build-bar-label\">", lbl, "</span>"
, "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:"
, show pct, "%\"></span></span>"
, "<span class=\"build-bar-count\">", show n, "</span>"
, "</div>"
]
in H.div H.! A.class_ "build-bar-row" $ do
H.span H.! A.class_ "build-bar-label" $ lbl
H.span H.! A.class_ "build-bar-wrap" $
H.span H.! A.class_ "build-bar"
H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
$ mempty
H.span H.! A.class_ "build-bar-count" $ txt (show n)
renderTagsSection :: [(String, Int)] -> Int -> String
renderTagsSection :: [(String, Int)] -> Int -> H.Html
renderTagsSection topTags uniqueCount =
section "tags" "Tags" $ concat
[ dl [("Unique tags", commaInt uniqueCount)]
, table ["Tag", "Items"] (map row topTags) Nothing
]
section "tags" "Tags" $ do
dl [("Unique tags", txt (commaInt uniqueCount))]
table ["Tag", "Items"] (map row topTags) Nothing
where
row (t, n) = [link ("/" ++ t ++ "/") t, show n]
row (t, n) = [link ("/" ++ t ++ "/") t, txt (show n)]
renderLinks :: Maybe (String, Int, String) -> Int -> Int -> String
renderLinks :: Maybe (String, Int, String) -> Int -> Int -> H.Html
renderLinks mMostLinked orphanCount total =
section "links" "Links" $
dl $
(case mMostLinked of
Nothing -> [("Most-linked page", "\x2014")]
Just (u, n, t) -> [("Most-linked page",
link u t ++ " (" ++ show n ++ " inbound links)")]) ++
[ ("Orphan pages", commaInt orphanCount
dl
[ case mMostLinked of
Nothing -> ("Most-linked page", "\x2014")
Just (u, n, t) ->
( "Most-linked page"
, do pageLink u t
txt (" (" ++ show n ++ " inbound links)")
)
, ( "Orphan pages"
, txt (commaInt orphanCount
++ " of " ++ commaInt total
++ " (" ++ pctStr orphanCount total ++ ")") ]
++ " (" ++ pctStr orphanCount total ++ ")")
)
]
renderEpistemic :: Int -> Int -> Int -> Int -> Int -> String
renderEpistemic :: Int -> Int -> Int -> Int -> Int -> H.Html
renderEpistemic total ws wc wi we =
section "epistemic" "Epistemic coverage" $
table
@ -515,47 +669,54 @@ renderEpistemic total ws wc wi we =
]
Nothing
where
row label n = [label, show n ++ " / " ++ show total, pctStr n total]
row label n = [ txt label
, txt (show n ++ " / " ++ show total)
, txt (pctStr n total)
]
renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> String
renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> H.Html
renderOutput grouped totalFiles totalSize =
section "output" "Output" $
table
["Type", "Files", "Size"]
(map row (sortBy (comparing (Down . snd . snd)) (Map.toList grouped)))
(Just ["Total", commaInt totalFiles, formatBytes totalSize])
(Just [ "Total"
, txt (commaInt totalFiles)
, txt (formatBytes totalSize)
])
where
row (ext, (n, sz)) = [ext, commaInt n, formatBytes sz]
row (ext, (n, sz)) = [txt ext, txt (commaInt n), txt (formatBytes sz)]
renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> String
renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> H.Html
renderRepository hf hl cf cl jf jl commits firstDate =
section "repository" "Repository" $
dl
[ ("Haskell", commaInt hl ++ " lines across " ++ show hf ++ " files")
, ("CSS", commaInt cl ++ " lines across " ++ show cf ++ " files")
, ("JavaScript", commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)")
, ("Total git commits", commaInt commits)
, ("Repository started", firstDate)
[ ("Haskell", txt (commaInt hl ++ " lines across " ++ show hf ++ " files"))
, ("CSS", txt (commaInt cl ++ " lines across " ++ show cf ++ " files"))
, ("JavaScript", txt (commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)"))
, ("Total git commits", txt (commaInt commits))
, ("Repository started", txt firstDate)
]
renderBuild :: String -> String -> String
renderBuild :: String -> String -> H.Html
renderBuild ts dur =
section "build" "Build" $
dl
[ ("Generated", ts)
, ("Last build duration", dur)
[ ("Generated", txt ts)
, ("Last build duration", txt dur)
]
-- ---------------------------------------------------------------------------
-- Static TOC (matches the nine h2 sections above)
-- ---------------------------------------------------------------------------
pageTOC :: String
pageTOC = "<ol>\n" ++ concatMap item sections ++ "</ol>\n"
pageTOC :: H.Html
pageTOC = H.ol $ mapM_ item sections
where
item (id_, title) =
"<li><a href=\"#" ++ id_ ++ "\" data-target=\"" ++ id_ ++ "\">"
++ title ++ "</a></li>\n"
H.li $ H.a H.! A.href (H.stringValue ("#" ++ id_))
H.! customAttr "data-target" id_
$ txt title
sections =
[ ("content", "Content")
, ("pages", "Pages")
@ -584,7 +745,7 @@ statsRules tags = do
-- ----------------------------------------------------------------
-- Load all content items
-- ----------------------------------------------------------------
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion)
essays <- loadAll (P.essayPattern .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
@ -698,21 +859,22 @@ statsRules tags = do
-- ----------------------------------------------------------------
-- Assemble page
-- ----------------------------------------------------------------
let content = concat
[ renderContent rows
, renderPages allPIs oldestDate newestDate
, renderDistribution allWCs
, renderTagsSection topTags uniqueTags
, renderLinks mostLinkedInfo orphanCount (length allPIs)
, renderEpistemic epTotal withStatus withConf withImp withEv
, renderOutput outputGrouped totalFiles totalSize
, renderRepository hf hl cf cl jf jl commits firstDate
, renderBuild buildTimestamp lastBuildDur
]
plainText = stripHtmlTags content
let htmlContent :: H.Html
htmlContent = do
renderContent rows
renderPages allPIs oldestDate newestDate
renderDistribution allWCs
renderTagsSection topTags uniqueTags
renderLinks mostLinkedInfo orphanCount (length allPIs)
renderEpistemic epTotal withStatus withConf withImp withEv
renderOutput outputGrouped totalFiles totalSize
renderRepository hf hl cf cl jf jl commits firstDate
renderBuild buildTimestamp lastBuildDur
contentString = renderHtml htmlContent
plainText = stripHtmlTags contentString
wc = length (words plainText)
rt = readingTime plainText
ctx = constField "toc" pageTOC
ctx = constField "toc" (renderHtml pageTOC)
<> constField "word-count" (show wc)
<> constField "reading-time" (show rt)
<> constField "title" "Build Telemetry"
@ -723,7 +885,7 @@ statsRules tags = do
<> authorLinksField
<> siteCtx
makeItem content
makeItem contentString
>>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls
@ -734,7 +896,7 @@ statsRules tags = do
create ["stats/index.html"] $ do
route idRoute
compile $ do
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion)
essays <- loadAll (P.essayPattern .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
@ -773,17 +935,18 @@ statsRules tags = do
today <- unsafeCompiler (utctDay <$> getCurrentTime)
let content = concat
[ section "activity" "Writing activity" (renderHeatmap wordsByDay today)
, renderMonthlyVolume wordsByDay
, renderCorpus typeRows allPIs
, renderNotable allPIs
, renderStatsTags topTags uniqueTags
]
plainText = stripHtmlTags content
let htmlContent :: H.Html
htmlContent = do
section "activity" "Writing activity" (renderHeatmap wordsByDay today)
renderMonthlyVolume wordsByDay
renderCorpus typeRows allPIs
renderNotable allPIs
renderStatsTags topTags uniqueTags
contentString = renderHtml htmlContent
plainText = stripHtmlTags contentString
wc = length (words plainText)
rt = readingTime plainText
ctx = constField "toc" statsTOC
ctx = constField "toc" (renderHtml statsTOC)
<> constField "word-count" (show wc)
<> constField "reading-time" (show rt)
<> constField "title" "Writing Statistics"
@ -793,7 +956,7 @@ statsRules tags = do
<> authorLinksField
<> siteCtx
makeItem content
makeItem contentString
>>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls

View File

@ -15,12 +15,13 @@
module Tags
( buildAllTags
, applyTagRules
, tagLinksField
) where
import Data.List (intercalate, nub)
import Hakyll
import Pagination (pageSize, sortAndGroup)
import Pagination (sortAndGroup)
import Patterns (tagIndexable)
import Contexts (abstractField, tagLinksField)
-- ---------------------------------------------------------------------------
@ -64,9 +65,7 @@ tagIdentifier = fromFilePath . tagFilePath
-- | Scan all essays and blog posts and build the Tags index.
buildAllTags :: Rules Tags
buildAllTags =
buildTagsWith getExpandedTags allContent tagIdentifier
where
allContent = ("content/essays/*.md" .||. "content/essays/*/index.md" .||. "content/blog/*.md") .&&. hasNoVersion
buildTagsWith getExpandedTags tagIndexable tagIdentifier
-- ---------------------------------------------------------------------------
@ -77,6 +76,7 @@ tagItemCtx :: Context String
tagItemCtx =
dateField "date" "%-d %B %Y"
<> tagLinksField "item-tags"
<> abstractField
<> defaultContext
-- | Page identifier for a tag index page.
@ -106,18 +106,3 @@ applyTagRules tags baseCtx = tagsRules tags $ \tag pat -> do
>>= relativizeUrls
-- ---------------------------------------------------------------------------
-- Tag links context field
-- ---------------------------------------------------------------------------
-- | List context field exposing an item's own (non-expanded) tags as
-- @tag-name@ / @tag-url@ objects.
--
-- $for(essay-tags)$<a href="$tag-url$">$tag-name$</a>$endfor$
tagLinksField :: String -> Context a
tagLinksField fieldName = listFieldWith fieldName itemCtx $ \item ->
map toItem <$> getTags (itemIdentifier item)
where
toItem t = Item (tagIdentifier t) t
itemCtx = field "tag-name" (return . itemBody)
<> field "tag-url" (\i -> return $ "/" ++ itemBody i ++ "/")

View File

@ -1,10 +1,26 @@
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Shared utilities used across the build system.
--
-- The HTML escapers (one for 'String', one for 'Text') live here so that
-- every filter, context, and renderer goes through the same definition.
-- The expansion order matters: @&@ MUST be replaced first, otherwise the
-- @&amp;@ injected by other rules gets re-escaped to @&amp;amp;@. The
-- pure-character-by-character implementation used here avoids that hazard
-- entirely (each character is mapped exactly once).
module Utils
( wordCount
, readingTime
, escapeHtml
, escapeHtmlText
, trim
, authorSlugify
, authorNameOf
) where
import Data.Char (isAlphaNum, isSpace, toLower)
import qualified Data.Text as T
-- | Count the number of words in a string (split on whitespace).
wordCount :: String -> Int
wordCount = length . words
@ -14,13 +30,49 @@ wordCount = length . words
readingTime :: String -> Int
readingTime s = max 1 (wordCount s `div` 200)
-- | Escape HTML special characters: <, >, &, ", '.
-- | Escape HTML special characters: @&@, @<@, @>@, @\"@, @\'@.
--
-- Safe for use in attribute values and text content. The order of the
-- @case@ branches is irrelevant — each input character maps to exactly
-- one output sequence.
escapeHtml :: String -> String
escapeHtml = concatMap escChar
where
escChar '&' = "&amp;"
escChar '<' = "&lt;"
escChar '>' = "&gt;"
escChar '&' = "&amp;"
escChar '"' = "&quot;"
escChar '\'' = "&#39;"
escChar c = [c]
-- | 'Text' counterpart of 'escapeHtml'.
escapeHtmlText :: T.Text -> T.Text
escapeHtmlText = T.concatMap escChar
where
escChar '&' = "&amp;"
escChar '<' = "&lt;"
escChar '>' = "&gt;"
escChar '"' = "&quot;"
escChar '\'' = "&#39;"
escChar c = T.singleton c
-- | Strip leading and trailing whitespace.
trim :: String -> String
trim = dropWhile isSpace . reverse . dropWhile isSpace . reverse
-- | Lowercase a string, drop everything that isn't alphanumeric or
-- space, then replace runs of spaces with single hyphens.
--
-- Used for author URL slugs (e.g. @"Levi Neuwirth" → "levi-neuwirth"@).
-- Centralised here so 'Authors' and 'Contexts' cannot drift on Unicode
-- edge cases.
authorSlugify :: String -> String
authorSlugify = map (\c -> if c == ' ' then '-' else c)
. filter (\c -> isAlphaNum c || c == ' ')
. map toLower
-- | Extract the author name from a "Name | url" frontmatter entry.
-- The URL portion is dropped (it's no longer used by the author system,
-- which routes everything through @/authors/{slug}/@).
authorNameOf :: String -> String
authorNameOf s = trim (takeWhile (/= '|') s)

View File

@ -2,7 +2,7 @@ active-repositories: hackage.haskell.org:merge
constraints: any.Glob ==0.10.2,
any.HUnit ==1.6.2.0,
any.JuicyPixels ==3.3.9,
any.OneTuple ==0.4.2,
any.OneTuple ==0.4.2.1,
any.Only ==0.1,
any.QuickCheck ==2.15.0.1,
any.StateVar ==1.2.2,
@ -182,7 +182,7 @@ constraints: any.Glob ==0.10.2,
any.text-conversions ==0.3.1.1,
any.text-icu ==0.8.0.5,
any.text-iso8601 ==0.1.1,
any.text-short ==0.1.6,
any.text-short ==0.1.6.1,
any.th-abstraction ==0.6.0.0,
any.th-compat ==0.1.7,
any.th-expand-syns ==0.4.12.0,

View File

@ -20,9 +20,9 @@ executable site
SimilarLinks
Compilers
Contexts
Patterns
Stats
Stability
Metadata
Tags
Pagination
Citations
@ -33,6 +33,7 @@ executable site
Filters.Smallcaps
Filters.Wikilinks
Filters.Transclusion
Filters.EmbedPdf
Filters.Links
Filters.Math
Filters.Code
@ -56,9 +57,10 @@ executable site
bytestring >= 0.11 && < 0.13,
process >= 1.6 && < 1.7,
data-default >= 0.7 && < 0.8,
mtl >= 2.3 && < 2.4
mtl >= 2.3 && < 2.4,
blaze-html >= 0.9 && < 0.10,
blaze-markup >= 0.8 && < 0.9
default-language: GHC2021
ghc-options:
-threaded
-Wall
-Wno-unused-imports