audit: Haskell build system correctness + Patterns.hs + Stats blaze rewrite

This commit is contained in:
Levi Neuwirth 2026-04-10 17:40:49 -04:00
parent c864e2f9cc
commit a358c8b246
28 changed files with 1290 additions and 548 deletions

View File

@ -12,32 +12,29 @@
module Authors module Authors
( buildAllAuthors ( buildAllAuthors
, applyAuthorRules , applyAuthorRules
, authorLinksField
) where ) where
import Data.Char (isAlphaNum, toLower)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Hakyll import Hakyll
import Hakyll.Core.Metadata (lookupStringList)
import Pagination (sortAndGroup) import Pagination (sortAndGroup)
import Tags (tagLinksField) import Patterns (authorIndexable)
import Contexts (abstractField, tagLinksField)
import Utils (authorSlugify, authorNameOf)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Slug helpers -- Slug helpers
--
-- The slugify and nameOf helpers used to live here in their own
-- definitions; they now defer to 'Utils' so that they cannot drift from
-- the 'Contexts' versions on Unicode edge cases.
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Lowercase, replace spaces with hyphens, strip anything else.
slugify :: String -> String slugify :: String -> String
slugify = map (\c -> if c == ' ' then '-' else c) slugify = authorSlugify
. filter (\c -> isAlphaNum c || c == ' ')
. map toLower
-- | Extract the author name from a "Name | url" entry, trimming whitespace.
nameOf :: String -> String nameOf :: String -> String
nameOf s = strip $ case break (== '|') s of { (n, _) -> n } nameOf = authorNameOf
where
strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ')
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -47,8 +44,10 @@ nameOf s = strip $ case break (== '|') s of { (n, _) -> n }
defaultAuthor :: String defaultAuthor :: String
defaultAuthor = "Levi Neuwirth" defaultAuthor = "Levi Neuwirth"
-- | Content patterns indexed by author. Sourced from 'Patterns.authorIndexable'
-- so this stays in lockstep with Tags.hs and Backlinks.hs.
allContent :: Pattern allContent :: Pattern
allContent = ("content/essays/*.md" .||. "content/blog/*.md") .&&. hasNoVersion allContent = authorIndexable
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -103,25 +102,7 @@ applyAuthorRules authors baseCtx = tagsRules authors $ \name pat -> do
where where
itemCtx = dateField "date" "%-d %B %Y" itemCtx = dateField "date" "%-d %B %Y"
<> tagLinksField "item-tags" <> tagLinksField "item-tags"
<> abstractField
<> defaultContext <> defaultContext
-- ---------------------------------------------------------------------------
-- Context field
-- ---------------------------------------------------------------------------
-- | Exposes each item's authors as @author-name@ / @author-url@ pairs.
-- All links point to /authors/{slug}/, regardless of any URL in frontmatter.
-- Defaults to Levi Neuwirth when no "authors" frontmatter key is present.
--
-- Usage in templates:
-- $for(author-links)$<a href="$author-url$">$author-name$</a>$sep$, $endfor$
authorLinksField :: Context a
authorLinksField = listFieldWith "author-links" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let entries = fromMaybe [] (lookupStringList "authors" meta)
names = if null entries then [defaultAuthor] else map nameOf entries
return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ slugify n ++ "/")) names
where
ctx = field "author-name" (return . fst . itemBody)
<> field "author-url" (return . snd . itemBody)

View File

@ -32,10 +32,12 @@ import Data.Ord (comparing)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import qualified Data.ByteString as BS
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Aeson as Aeson import qualified Data.Aeson as Aeson
import Data.Aeson ((.=)) import Data.Aeson ((.=))
import Text.Pandoc.Class (runPure) import Text.Pandoc.Class (runPure)
@ -47,6 +49,7 @@ import Text.Pandoc.Walk (query)
import Hakyll import Hakyll
import Compilers (readerOpts, writerOpts) import Compilers (readerOpts, writerOpts)
import Filters (preprocessSource) import Filters (preprocessSource)
import qualified Patterns as P
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Link-with-context entry (intermediate, saved by the "links" pass) -- Link-with-context entry (intermediate, saved by the "links" pass)
@ -184,28 +187,47 @@ linksCompiler = do
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Normalise an internal URL as a map key: strip query string, fragment, -- | Normalise an internal URL as a map key: strip query string, fragment,
-- and trailing @.html@; ensure a leading slash. -- and trailing @.html@; ensure a leading slash; percent-decode the path
-- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same
-- key.
normaliseUrl :: String -> String normaliseUrl :: String -> String
normaliseUrl url = normaliseUrl url =
let t = T.pack url let t = T.pack url
t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t))) t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t)))
t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1 t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1
t3 = fromMaybe t2 (T.stripSuffix ".html" t2) t3 = fromMaybe t2 (T.stripSuffix ".html" t2)
in T.unpack t3 in percentDecode (T.unpack t3)
-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the
-- resulting bytestring as UTF-8. Invalid escapes are passed through
-- verbatim so this is safe to call on already-decoded input.
percentDecode :: String -> String
percentDecode = T.unpack . TE.decodeUtf8With lenientDecode . pack . go
where
go [] = []
go ('%':a:b:rest)
| Just hi <- hexDigit a
, Just lo <- hexDigit b
= fromIntegral (hi * 16 + lo) : go rest
go (c:rest) = fromIntegral (fromEnum c) : go rest
hexDigit c
| c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0')
| c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10)
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
| otherwise = Nothing
pack = BS.pack
lenientDecode = TE.lenientDecode
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Content patterns (must match the rules in Site.hs) -- Content patterns (must match the rules in Site.hs — sourced from
-- Patterns.allContent so additions to the canonical list automatically
-- propagate to backlinks).
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
allContent :: Pattern allContent :: Pattern
allContent = allContent = P.allContent
"content/essays/*.md"
.||. "content/essays/*/index.md"
.||. "content/blog/*.md"
.||. "content/poetry/*.md"
.||. "content/fiction/*.md"
.||. "content/music/*/index.md"
.||. "content/*.md"
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Hakyll rules -- Hakyll rules

View File

@ -7,7 +7,8 @@ module Catalog
( musicCatalogCtx ( musicCatalogCtx
) where ) where
import Data.List (groupBy, sortBy) import Data.Char (isSpace, toLower)
import Data.List (groupBy, isPrefixOf, sortBy)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Ord (comparing) import Data.Ord (comparing)
import Data.Aeson (Value (..)) import Data.Aeson (Value (..))
@ -15,7 +16,6 @@ import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Text as T import qualified Data.Text as T
import Hakyll import Hakyll
import Hakyll.Core.Metadata (lookupStringList)
import Contexts (siteCtx) import Contexts (siteCtx)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -115,6 +115,40 @@ parseCatalogEntry item = do
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- HTML rendering -- HTML rendering
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
--
-- Trust model: per the site convention (see also Stats.hs:pageLink),
-- frontmatter @title@ values are author-controlled trusted HTML and may
-- contain inline markup such as @<em>...</em>@. They are emitted
-- pre-escaped — but we still escape every other interpolated frontmatter
-- value (year, duration, instrumentation) and sanitize hrefs through
-- 'safeHref', so a stray @<@ in those fields cannot break the markup.
-- | Defense-in-depth href sanitiser. Mirrors 'Stats.isSafeUrl'.
safeHref :: String -> String
safeHref u =
let norm = map toLower (dropWhile isSpace u)
in if not ("//" `isPrefixOf` norm)
&& any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
then escAttr u
else "#"
escAttr :: String -> String
escAttr = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '"' = "&quot;"
esc '\'' = "&#39;"
esc c = [c]
escText :: String -> String
escText = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc c = [c]
renderIndicators :: CatalogEntry -> String renderIndicators :: CatalogEntry -> String
renderIndicators e = concatMap render renderIndicators e = concatMap render
@ -129,19 +163,21 @@ renderEntry :: CatalogEntry -> String
renderEntry e = concat renderEntry e = concat
[ "<li class=\"catalog-entry\">" [ "<li class=\"catalog-entry\">"
, "<div class=\"catalog-entry-main\">" , "<div class=\"catalog-entry-main\">"
, "<a class=\"catalog-title\" href=\"", ceUrl e, "\">", ceTitle e, "</a>" , "<a class=\"catalog-title\" href=\"", safeHref (ceUrl e), "\">"
, ceTitle e
, "</a>"
, renderIndicators e , renderIndicators e
, maybe "" (\y -> "<span class=\"catalog-year\">" ++ y ++ "</span>") (ceYear e) , maybe "" (\y -> "<span class=\"catalog-year\">" ++ escText y ++ "</span>") (ceYear e)
, maybe "" (\d -> "<span class=\"catalog-duration\">" ++ d ++ "</span>") (ceDuration e) , maybe "" (\d -> "<span class=\"catalog-duration\">" ++ escText d ++ "</span>") (ceDuration e)
, "</div>" , "</div>"
, maybe "" (\i -> "<div class=\"catalog-instrumentation\">" ++ i ++ "</div>") (ceInstrumentation e) , maybe "" (\i -> "<div class=\"catalog-instrumentation\">" ++ escText i ++ "</div>") (ceInstrumentation e)
, "</li>" , "</li>"
] ]
renderCategorySection :: String -> [CatalogEntry] -> String renderCategorySection :: String -> [CatalogEntry] -> String
renderCategorySection cat entries = concat renderCategorySection cat entries = concat
[ "<section class=\"catalog-section\">" [ "<section class=\"catalog-section\">"
, "<h2 class=\"catalog-section-title\">", categoryLabel cat, "</h2>" , "<h2 class=\"catalog-section-title\">", escText (categoryLabel cat), "</h2>"
, "<ul class=\"catalog-list\">" , "<ul class=\"catalog-list\">"
, concatMap renderEntry entries , concatMap renderEntry entries
, "</ul>" , "</ul>"
@ -191,7 +227,12 @@ catalogByCategoryField = field "catalog-by-category" $ \_ -> do
else do else do
let sorted = sortBy (comparing (categoryRank . ceCategory)) entries let sorted = sortBy (comparing (categoryRank . ceCategory)) entries
grouped = groupBy (\a b -> ceCategory a == ceCategory b) sorted grouped = groupBy (\a b -> ceCategory a == ceCategory b) sorted
return $ concatMap (\g -> renderCategorySection (ceCategory (head g)) g) grouped return $ concatMap renderGroup grouped
where
-- groupBy on a non-empty list yields non-empty sublists, but pattern
-- matching is total whereas 'head' is not.
renderGroup [] = ""
renderGroup g@(e : _) = renderCategorySection (ceCategory e) g
musicCatalogCtx :: Context String musicCatalogCtx :: Context String
musicCatalogCtx = musicCatalogCtx =

View File

@ -137,9 +137,14 @@ transformInline :: Map Text Int -> Inline -> Inline
transformInline keyNums (Cite citations _) = transformInline keyNums (Cite citations _) =
let keys = map citationId citations let keys = map citationId citations
nums = mapMaybe (`Map.lookup` keyNums) keys nums = mapMaybe (`Map.lookup` keyNums) keys
in if null nums in case (keys, nums) of
then Str "" -- Both lists are guaranteed non-empty by the @null nums@ check
else RawInline "html" (markerHtml keys (head keys) (head nums) nums) -- below, but pattern-match to keep this total instead of
-- relying on @head@.
(firstKey : _, firstNum : _) ->
RawInline "html" (markerHtml keys firstKey firstNum nums)
_ ->
Str ""
transformInline _ x = x transformInline _ x = x
markerHtml :: [Text] -> Text -> Int -> [Int] -> Text markerHtml :: [Text] -> Text -> Int -> [Int] -> Text

View File

@ -125,9 +125,9 @@ renderThemedView entries =
renderChronoView :: [CPEntry] -> String renderChronoView :: [CPEntry] -> String
renderChronoView entries = renderChronoView entries =
"<div class=\"cp-chrono\" id=\"cp-chrono\" hidden>" "<div class=\"cp-chrono\" id=\"cp-chrono\" hidden>"
++ if null sorted ++ (if null sorted
then "<p class=\"cp-empty\">No entries yet.</p>" then "<p class=\"cp-empty\">No entries yet.</p>"
else concatMap renderEntry sorted else concatMap renderEntry sorted)
++ "</div>" ++ "</div>"
where where
sorted = sortBy (comparing (Down . cpDateAdded)) entries sorted = sortBy (comparing (Down . cpDateAdded)) entries

View File

@ -12,7 +12,6 @@ module Compilers
) where ) where
import Hakyll import Hakyll
import Hakyll.Core.Metadata (lookupStringList, lookupString)
import Text.Pandoc.Definition (Pandoc (..), Block (..), import Text.Pandoc.Definition (Pandoc (..), Block (..),
Inline (..)) Inline (..))
import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..), import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..),
@ -158,7 +157,9 @@ essayCompilerWith rOpts = do
Viz.inlineViz srcDir pandocWithScores Viz.inlineViz srcDir pandocWithScores
-- Apply remaining AST-level filters (sidenotes, smallcaps, links, etc.). -- Apply remaining AST-level filters (sidenotes, smallcaps, links, etc.).
let pandocFiltered = applyAll pandocWithViz -- applyAll touches the filesystem via Images.apply (webp existence
-- check), so it runs through unsafeCompiler.
pandocFiltered <- unsafeCompiler $ applyAll srcDir pandocWithViz
let pandocItem' = itemSetBody pandocFiltered pandocItem let pandocItem' = itemSetBody pandocFiltered pandocItem
-- Build TOC from the filtered AST. -- Build TOC from the filtered AST.
@ -205,8 +206,12 @@ pageCompiler = do
body <- getResourceBody body <- getResourceBody
let src = itemBody body let src = itemBody body
body' = itemSetBody (preprocessSource src) body body' = itemSetBody (preprocessSource src) body
pandocItem <- fmap (fmap applyAll) (readPandocWith readerOpts body') filePath <- getResourceFilePath
let htmlItem = writePandocWith writerOpts pandocItem let srcDir = takeDirectory filePath
pandocItem <- readPandocWith readerOpts body'
pandocFiltered <- unsafeCompiler $ applyAll srcDir (itemBody pandocItem)
let pandocItem' = itemSetBody pandocFiltered pandocItem
let htmlItem = writePandocWith writerOpts pandocItem'
_ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem) _ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem)
_ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem) _ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem)
return htmlItem return htmlItem

View File

@ -9,13 +9,16 @@ module Contexts
, fictionCtx , fictionCtx
, compositionCtx , compositionCtx
, contentKindField , contentKindField
, abstractField
, tagLinksField
, authorLinksField
) where ) where
import Data.Aeson (Value (..)) import Data.Aeson (Value (..))
import qualified Data.Aeson.KeyMap as KM import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V import qualified Data.Vector as V
import Data.List (isPrefixOf) import Data.List (intercalate, isPrefixOf)
import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe (fromMaybe)
import Data.Time.Calendar (toGregorian) import Data.Time.Calendar (toGregorian)
import Data.Time.Clock (getCurrentTime, utctDay) import Data.Time.Clock (getCurrentTime, utctDay)
import Data.Time.Format (formatTime, defaultTimeLocale) import Data.Time.Format (formatTime, defaultTimeLocale)
@ -24,13 +27,11 @@ import Text.Read (readMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc (runPure, readMarkdown, writeHtml5String, Pandoc(..), Block(..), Inline(..)) import Text.Pandoc (runPure, readMarkdown, writeHtml5String, Pandoc(..), Block(..), Inline(..))
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
import Hakyll import Hakyll hiding (trim)
import Hakyll.Core.Metadata (lookupStringList)
import Authors (authorLinksField)
import Backlinks (backlinksField) import Backlinks (backlinksField)
import SimilarLinks (similarLinksField) import SimilarLinks (similarLinksField)
import Stability (stabilityField, lastReviewedField, versionHistoryField) import Stability (stabilityField, lastReviewedField, versionHistoryField)
import Tags (tagLinksField) import Utils (authorSlugify, authorNameOf, trim)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Affiliation field -- Affiliation field
@ -96,13 +97,13 @@ contentKindField = field "item-kind" $ \item -> do
r <- getRoute (itemIdentifier item) r <- getRoute (itemIdentifier item)
return $ case r of return $ case r of
Nothing -> "Page" Nothing -> "Page"
Just route Just r'
| "essays/" `isPrefixOf` route -> "Essay" | "essays/" `isPrefixOf` r' -> "Essay"
| "blog/" `isPrefixOf` route -> "Post" | "blog/" `isPrefixOf` r' -> "Post"
| "poetry/" `isPrefixOf` route -> "Poem" | "poetry/" `isPrefixOf` r' -> "Poem"
| "fiction/" `isPrefixOf` route -> "Fiction" | "fiction/" `isPrefixOf` r' -> "Fiction"
| "music/" `isPrefixOf` route -> "Composition" | "music/" `isPrefixOf` r' -> "Composition"
| otherwise -> "Page" | otherwise -> "Page"
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Site-wide context -- Site-wide context
@ -112,22 +113,79 @@ contentKindField = field "item-kind" $ \item -> do
-- in the @js:@ frontmatter key (accepts a scalar string or a YAML list). -- in the @js:@ frontmatter key (accepts a scalar string or a YAML list).
-- Returns an empty list when absent; $for iterates zero times, emitting nothing. -- Returns an empty list when absent; $for iterates zero times, emitting nothing.
-- NOTE: do not use fail here — $for does not catch noResult the way $if does. -- NOTE: do not use fail here — $for does not catch noResult the way $if does.
--
-- Each child Item is keyed on @<parent-identifier>#js-<index>@ so that two
-- pages referencing the same script path (e.g. @shared.js@) do not collide
-- in Hakyll's item store.
pageScriptsField :: Context String pageScriptsField :: Context String
pageScriptsField = listFieldWith "page-scripts" ctx $ \item -> do pageScriptsField = listFieldWith "page-scripts" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item) meta <- getMetadata (itemIdentifier item)
let scripts = case lookupStringList "js" meta of let scripts = case lookupStringList "js" meta of
Just xs -> xs Just xs -> xs
Nothing -> maybe [] (:[]) (lookupString "js" meta) Nothing -> maybe [] (:[]) (lookupString "js" meta)
return $ map (\s -> Item (fromFilePath s) s) scripts parent = toFilePath (itemIdentifier item)
return $ zipWith
(\i s -> Item (fromFilePath (parent ++ "#js-" ++ show (i :: Int))) s)
[0 ..]
scripts
where where
ctx = field "script-src" (return . itemBody) ctx = field "script-src" (return . itemBody)
-- ---------------------------------------------------------------------------
-- Tag links field
-- ---------------------------------------------------------------------------
-- | List context field exposing an item's own (non-expanded) tags as
-- @tag-name@ / @tag-url@ objects.
--
-- $for(essay-tags)$<a href="$tag-url$">$tag-name$</a>$endfor$
tagLinksField :: String -> Context a
tagLinksField fieldName = listFieldWith fieldName ctx $ \item ->
map toItem <$> getTags (itemIdentifier item)
where
toItem t = Item (fromFilePath (t ++ "/index.html")) t
ctx = field "tag-name" (return . itemBody)
<> field "tag-url" (\i -> return $ "/" ++ itemBody i ++ "/")
-- ---------------------------------------------------------------------------
-- Author links field
-- ---------------------------------------------------------------------------
--
-- 'authorSlugify' and 'authorNameOf' are imported from 'Utils' so that
-- they cannot drift from the copies in 'Authors'.
-- | Exposes each item's authors as @author-name@ / @author-url@ pairs.
-- Defaults to Levi Neuwirth when no "authors" frontmatter key is present.
--
-- Entries that produce an empty name (e.g. @"| https://url"@) or an empty
-- slug (e.g. all-punctuation names) are dropped, so the field never emits
-- a @/authors//@ link.
--
-- $for(author-links)$<a href="$author-url$">$author-name$</a>$sep$, $endfor$
authorLinksField :: Context a
authorLinksField = listFieldWith "author-links" ctx $ \item -> do
meta <- getMetadata (itemIdentifier item)
let entries = fromMaybe [] (lookupStringList "authors" meta)
rawNames = if null entries then ["Levi Neuwirth"] else map authorNameOf entries
validNames = filter (\n -> not (null n) && not (null (authorSlugify n))) rawNames
names = if null validNames then ["Levi Neuwirth"] else validNames
return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ authorSlugify n ++ "/")) names
where
ctx = field "author-name" (return . fst . itemBody)
<> field "author-url" (return . snd . itemBody)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Abstract field -- Abstract field
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Renders the abstract using Pandoc to support Markdown and LaTeX math. -- | Renders the abstract using Pandoc to support Markdown and LaTeX math.
-- Strips the outer <p> tag if the abstract is a single paragraph. -- Strips the outer @<p>@ wrapping. A single-paragraph abstract becomes a
-- bare @Plain@ so the rendered HTML is unwrapped inlines. A multi-paragraph
-- abstract (author used a blank line in the YAML literal block) is flattened
-- to a single @Plain@ with @LineBreak@ separators between what were
-- originally paragraph boundaries — the visual break is preserved without
-- emitting stray @<p>@ tags inside the metadata block. Mixed block content
-- (e.g. an abstract containing a blockquote) falls through unchanged.
abstractField :: Context String abstractField :: Context String
abstractField = field "abstract" $ \item -> do abstractField = field "abstract" $ \item -> do
meta <- getMetadata (itemIdentifier item) meta <- getMetadata (itemIdentifier item)
@ -138,12 +196,20 @@ abstractField = field "abstract" $ \item -> do
doc <- readMarkdown defaultHakyllReaderOptions (T.pack src) doc <- readMarkdown defaultHakyllReaderOptions (T.pack src)
let doc' = case doc of let doc' = case doc of
Pandoc m [Para ils] -> Pandoc m [Plain ils] Pandoc m [Para ils] -> Pandoc m [Plain ils]
_ -> doc Pandoc m blocks
| all isPara blocks && not (null blocks) ->
let joined = intercalate [LineBreak]
[ils | Para ils <- blocks]
in Pandoc m [Plain joined]
_ -> doc
let wOpts = defaultHakyllWriterOptions { writerHTMLMathMethod = MathML } let wOpts = defaultHakyllWriterOptions { writerHTMLMathMethod = MathML }
writeHtml5String wOpts doc' writeHtml5String wOpts doc'
case pandocResult of case pandocResult of
Left err -> fail $ "Pandoc error rendering abstract: " ++ show err Left err -> fail $ "Pandoc error rendering abstract: " ++ show err
Right html -> return (T.unpack html) Right html -> return (T.unpack html)
where
isPara (Para _) = True
isPara _ = False
siteCtx :: Context String siteCtx :: Context String
siteCtx = siteCtx =
@ -208,21 +274,37 @@ dotsField ctxKey metaKey = field ctxKey $ \item -> do
-- | @$confidence-trend$@: ↑, ↓, or → derived from the last two entries -- | @$confidence-trend$@: ↑, ↓, or → derived from the last two entries
-- in the @confidence-history@ frontmatter list. Returns @noResult@ when -- in the @confidence-history@ frontmatter list. Returns @noResult@ when
-- there is no history or only a single entry. -- there is no history or only a single entry.
--
-- The arrow flips when the absolute change crosses 'trendThreshold'
-- (currently 5 percentage points). Smaller swings count as flat.
confidenceTrendField :: Context String confidenceTrendField :: Context String
confidenceTrendField = field "confidence-trend" $ \item -> do confidenceTrendField = field "confidence-trend" $ \item -> do
meta <- getMetadata (itemIdentifier item) meta <- getMetadata (itemIdentifier item)
case lookupStringList "confidence-history" meta of case lookupStringList "confidence-history" meta of
Nothing -> fail "no confidence history" Nothing -> fail "no confidence history"
Just xs | length xs < 2 -> fail "no confidence history" Just xs -> case lastTwo xs of
Just xs -> Nothing -> fail "no confidence history"
let prev = readMaybe (xs !! (length xs - 2)) :: Maybe Int Just (prevS, curS) ->
cur = readMaybe (last xs) :: Maybe Int let prev = readMaybe prevS :: Maybe Int
in case (prev, cur) of cur = readMaybe curS :: Maybe Int
(Just p, Just c) in case (prev, cur) of
| c - p > 5 -> return "\x2191" -- ↑ (Just p, Just c)
| p - c > 5 -> return "\x2193" -- ↓ | c - p > trendThreshold -> return "\x2191" -- ↑
| otherwise -> return "\x2192" -- → | p - c > trendThreshold -> return "\x2193" -- ↓
_ -> return "\x2192" | otherwise -> return "\x2192" -- →
_ -> return "\x2192"
where
trendThreshold :: Int
trendThreshold = 5
-- Total replacement for @(xs !! (length xs - 2), last xs)@: returns
-- the last two elements of a list, in order, or 'Nothing' when the
-- list has fewer than two entries.
lastTwo :: [a] -> Maybe (a, a)
lastTwo [] = Nothing
lastTwo [_] = Nothing
lastTwo [a, b] = Just (a, b)
lastTwo (_ : rest) = lastTwo rest
-- | @$overall-score$@: weighted composite of confidence (50 %), -- | @$overall-score$@: weighted composite of confidence (50 %),
-- evidence quality (30 %), and importance (20 %), expressed as an -- evidence quality (30 %), and importance (20 %), expressed as an
@ -332,12 +414,27 @@ data Movement = Movement
, movAudio :: Maybe String , movAudio :: Maybe String
} }
parseMovements :: Metadata -> [Movement] -- | Parse the @movements@ frontmatter key. Returns parsed movements and a
parseMovements meta = -- list of human-readable warnings for any entries that failed to parse.
-- Callers can surface the warnings via 'unsafeCompiler' so silent typos
-- don't strip movements without diagnostic.
parseMovementsWithWarnings :: Metadata -> ([Movement], [String])
parseMovementsWithWarnings meta =
case KM.lookup "movements" meta of case KM.lookup "movements" meta of
Just (Array v) -> catMaybes $ map parseOne (V.toList v) Just (Array v) ->
_ -> [] let results = zipWith parseIndexed [1 :: Int ..] (V.toList v)
in ( [m | Right m <- results]
, [w | Left w <- results]
)
_ -> ([], [])
where where
parseIndexed i value =
case parseOne value of
Just m -> Right m
Nothing -> Left $
"movement #" ++ show i ++ " is missing a required field "
++ "(name, page, or duration) — entry skipped"
parseOne (Object o) = Movement parseOne (Object o) = Movement
<$> (getString =<< KM.lookup "name" o) <$> (getString =<< KM.lookup "name" o)
<*> (getInt =<< KM.lookup "page" o) <*> (getInt =<< KM.lookup "page" o)
@ -351,6 +448,9 @@ parseMovements meta =
getInt (Number n) = Just (floor (fromRational (toRational n) :: Double)) getInt (Number n) = Just (floor (fromRational (toRational n) :: Double))
getInt _ = Nothing getInt _ = Nothing
parseMovements :: Metadata -> [Movement]
parseMovements = fst . parseMovementsWithWarnings
-- | Extract the composition slug from an item's identifier. -- | Extract the composition slug from an item's identifier.
-- "content/music/symphonic-dances/index.md" → "symphonic-dances" -- "content/music/symphonic-dances/index.md" → "symphonic-dances"
compSlug :: Item a -> String compSlug :: Item a -> String
@ -410,7 +510,11 @@ compositionCtx =
movementsListField = listFieldWith "movements" movCtx $ \item -> do movementsListField = listFieldWith "movements" movCtx $ \item -> do
meta <- getMetadata (itemIdentifier item) meta <- getMetadata (itemIdentifier item)
let mvs = parseMovements meta let (mvs, warnings) = parseMovementsWithWarnings meta
ident = toFilePath (itemIdentifier item)
unsafeCompiler $ mapM_
(\w -> putStrLn $ "[Movements] " ++ ident ++ ": " ++ w)
warnings
return $ zipWith return $ zipWith
(\idx mv -> Item (fromFilePath ("mv" ++ show (idx :: Int))) mv) (\idx mv -> Item (fromFilePath ("mv" ++ show (idx :: Int))) mv)
[1..] mvs [1..] mvs

View File

@ -22,16 +22,25 @@ import qualified Filters.Images as Images
-- | Apply all AST-level filters in pipeline order. -- | Apply all AST-level filters in pipeline order.
-- Run on the Pandoc document after reading, before writing. -- Run on the Pandoc document after reading, before writing.
applyAll :: Pandoc -> Pandoc --
applyAll -- 'Filters.Images.apply' is the only IO-performing filter (it probes the
= Sidenotes.apply -- filesystem for @.webp@ companions before deciding whether to emit
. Typography.apply -- @<picture>@). It runs first — i.e. innermost in the composition — and
. Links.apply -- every downstream filter stays pure. @srcDir@ is the directory of the
. Smallcaps.apply -- source Markdown file, passed through to Images for relative-path
. Dropcaps.apply -- resolution of co-located assets.
. Math.apply applyAll :: FilePath -> Pandoc -> IO Pandoc
. Code.apply applyAll srcDir doc = do
. Images.apply imagesDone <- Images.apply srcDir doc
pure
. Sidenotes.apply
. Typography.apply
. Links.apply
. Smallcaps.apply
. Dropcaps.apply
. Math.apply
. Code.apply
$ imagesDone
-- | Apply source-level preprocessors to the raw Markdown string. -- | Apply source-level preprocessors to the raw Markdown string.
-- Order matters: EmbedPdf must run before Transclusion, because the -- Order matters: EmbedPdf must run before Transclusion, because the

View File

@ -16,6 +16,7 @@ module Filters.EmbedPdf (preprocess) where
import Data.Char (isDigit) import Data.Char (isDigit)
import Data.List (isPrefixOf, isSuffixOf) import Data.List (isPrefixOf, isSuffixOf)
import qualified Utils as U
-- | Apply PDF-embed substitution to the raw Markdown source string. -- | Apply PDF-embed substitution to the raw Markdown source string.
preprocess :: String -> String preprocess :: String -> String
@ -23,7 +24,7 @@ preprocess = unlines . map processLine . lines
processLine :: String -> String processLine :: String -> String
processLine line = processLine line =
case parseDirective (trim line) of case parseDirective (U.trim line) of
Nothing -> line Nothing -> line
Just (filePath, pageHash) -> renderEmbed filePath pageHash Just (filePath, pageHash) -> renderEmbed filePath pageHash
@ -64,7 +65,9 @@ renderEmbed filePath pageHash =
-- | Percent-encode characters that would break a query-string value. -- | Percent-encode characters that would break a query-string value.
-- Slashes are left unencoded so root-relative paths remain readable and -- Slashes are left unencoded so root-relative paths remain readable and
-- work correctly with PDF.js's internal fetch. -- work correctly with PDF.js's internal fetch. @#@ is encoded for
-- defense-in-depth even though the directive parser already splits on it
-- before this function is called.
encodeQueryValue :: String -> String encodeQueryValue :: String -> String
encodeQueryValue = concatMap enc encodeQueryValue = concatMap enc
where where
@ -73,9 +76,6 @@ encodeQueryValue = concatMap enc
enc '?' = "%3F" enc '?' = "%3F"
enc '+' = "%2B" enc '+' = "%2B"
enc '"' = "%22" enc '"' = "%22"
enc '#' = "%23"
enc c = [c] enc c = [c]
-- | Strip leading and trailing spaces.
trim :: String -> String
trim = f . f
where f = reverse . dropWhile (== ' ')

View File

@ -2,53 +2,93 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
-- | Image filter: lazy loading, lightbox markers, and WebP <picture> wrappers. -- | Image filter: lazy loading, lightbox markers, and WebP <picture> wrappers.
-- --
-- For local raster images (JPG, JPEG, PNG, GIF), emits a @<picture>@ element -- For local raster images (JPG, JPEG, PNG, GIF) whose @.webp@ companion
-- with a WebP @<source>@ and the original format as the @<img>@ fallback. -- exists on disk at build time, emits a @<picture>@ element with a WebP
-- tools/convert-images.sh produces the companion .webp files at build time. -- @<source>@ and the original format as the @<img>@ fallback. When the
-- webp companion is absent (cwebp not installed, @convert-images.sh@ not
-- yet run, or a single file missed), the filter emits a plain @<img>@ so
-- the image still renders. This matters because browsers do NOT fall back
-- from a 404'd @<source>@ inside @<picture>@ to the nested @<img>@ — the
-- source is selected up front and a broken one leaves the area blank.
--
-- @tools/convert-images.sh@ produces the companion .webp files at build
-- time. When cwebp is not installed the script is a no-op, and this
-- filter degrades gracefully to plain @<img>@.
-- --
-- SVG files and external URLs are passed through with only lazy loading -- SVG files and external URLs are passed through with only lazy loading
-- (and lightbox markers for standalone images). -- (and lightbox markers for standalone images).
module Filters.Images (apply) where module Filters.Images (apply) where
import Data.Char (toLower) import Data.Char (toLower)
import Data.List (isPrefixOf)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import System.FilePath (replaceExtension) import System.Directory (doesFileExist)
import System.FilePath (replaceExtension, takeExtension, (</>))
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- | Apply image attribute injection and WebP wrapping to the entire document. -- | Apply image attribute injection and WebP wrapping to the entire document.
apply :: Pandoc -> Pandoc --
apply = walk transformInline -- @srcDir@ is the directory of the source Markdown file, used to resolve
-- relative image paths when probing for the corresponding @.webp@
-- companion file. Absolute paths (leading @/@) are resolved against
-- @static/@ instead, matching the layout @convert-images.sh@ writes to.
apply :: FilePath -> Pandoc -> IO Pandoc
apply srcDir = walkM (transformInline srcDir)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Core transformation -- Core transformation
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
transformInline :: Inline -> Inline transformInline :: FilePath -> Inline -> IO Inline
transformInline (Link lAttr ils lTarget) = transformInline srcDir (Link lAttr ils lTarget) = do
-- Recurse into link contents; images inside a link get no lightbox marker. -- Recurse into link contents; images inside a link get no lightbox marker.
Link lAttr (map wrapLinkedImg ils) lTarget ils' <- mapM (wrapLinkedImg srcDir) ils
where pure (Link lAttr ils' lTarget)
wrapLinkedImg (Image iAttr alt iTarget) = renderImg iAttr alt iTarget False transformInline srcDir (Image attr alt target) =
wrapLinkedImg x = x renderImg srcDir attr alt target True
transformInline (Image attr alt target) = transformInline _ x = pure x
renderImg attr alt target True
transformInline x = x wrapLinkedImg :: FilePath -> Inline -> IO Inline
wrapLinkedImg srcDir (Image iAttr alt iTarget) =
renderImg srcDir iAttr alt iTarget False
wrapLinkedImg _ x = pure x
-- | Dispatch on image type: -- | Dispatch on image type:
-- * Local raster → @<picture>@ with WebP @<source>@ -- * Local raster with webp companion on disk → @<picture>@ with WebP @<source>@
-- * Everything else → plain @<img>@ with loading/lightbox attrs -- * Local raster without companion → plain @<img>@ (graceful degradation)
renderImg :: Attr -> [Inline] -> Target -> Bool -> Inline -- * Everything else (SVG, URL) → plain @<img>@ with loading/lightbox attrs
renderImg attr alt target@(src, _) lightbox renderImg :: FilePath -> Attr -> [Inline] -> Target -> Bool -> IO Inline
| isLocalRaster (T.unpack src) = renderImg srcDir attr alt target@(src, _) lightbox
RawInline (Format "html") (renderPicture attr alt target lightbox) | isLocalRaster (T.unpack src) = do
hasWebp <- doesFileExist (webpPhysicalPath srcDir src)
if hasWebp
then pure $ RawInline (Format "html")
(renderPicture attr alt target lightbox)
else pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr))
alt target
| otherwise = | otherwise =
Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target
where where
addLightbox True a = addAttr "data-lightbox" "true" a addLightbox True a = addAttr "data-lightbox" "true" a
addLightbox False a = a addLightbox False a = a
-- | Physical on-disk path of the @.webp@ companion for a Markdown image src.
--
-- Absolute paths (@/images/foo.jpg@) resolve under @static/@ because that
-- is where Hakyll's static-asset rule writes them from. Relative paths
-- resolve against the source file's directory, where Pandoc already
-- expects co-located assets to live.
webpPhysicalPath :: FilePath -> Text -> FilePath
webpPhysicalPath srcDir src =
let s = T.unpack src
physical = if "/" `isPrefixOf` s
then "static" ++ s
else srcDir </> s
in replaceExtension physical ".webp"
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- <picture> rendering -- <picture> rendering
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -73,8 +113,13 @@ renderPicture (ident, classes, kvs) alt (src, title) lightbox =
] ]
where where
webpSrc = replaceExtension (T.unpack src) ".webp" webpSrc = replaceExtension (T.unpack src) ".webp"
-- Strip attrs we handle explicitly so they don't appear twice. -- Strip attrs we handle explicitly above (id/class/alt/title) and the
passedKvs = filter (\(k, _) -> k `notElem` ["loading", "data-lightbox"]) kvs -- attrs we always emit ourselves (loading, data-lightbox), so they don't
-- appear twice on the <img>.
passedKvs = filter
(\(k, _) -> k `notElem`
["loading", "data-lightbox", "id", "class", "alt", "title", "src"])
kvs
attrId :: Text -> Text attrId :: Text -> Text
attrId t = if T.null t then "" else " id=\"" <> esc t <> "\"" attrId t = if T.null t then "" else " id=\"" <> esc t <> "\""
@ -103,11 +148,11 @@ isLocalRaster src = not (isUrl src) && lowerExt src `elem` [".jpg", ".jpeg", ".p
isUrl :: String -> Bool isUrl :: String -> Bool
isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"] isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"]
where isPrefixOf pfx str = take (length pfx) str == pfx
-- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg"). -- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg").
-- Returns the empty string for paths with no extension.
lowerExt :: FilePath -> String lowerExt :: FilePath -> String
lowerExt = map toLower . reverse . ('.' :) . takeWhile (/= '.') . tail . dropWhile (/= '.') . reverse lowerExt = map toLower . takeExtension
-- | Prepend a key=value pair if not already present. -- | Prepend a key=value pair if not already present.
addAttr :: Text -> Text -> Attr -> Attr addAttr :: Text -> Text -> Attr -> Attr
@ -119,24 +164,28 @@ addAttr k v (i, cs, kvs)
stringify :: [Inline] -> Text stringify :: [Inline] -> Text
stringify = T.concat . map go stringify = T.concat . map go
where where
go (Str t) = t go (Str t) = t
go Space = " " go Space = " "
go SoftBreak = " " go SoftBreak = " "
go LineBreak = " " go LineBreak = " "
go (Emph ils) = stringify ils go (Emph ils) = stringify ils
go (Strong ils) = stringify ils go (Strong ils) = stringify ils
go (Code _ t) = t go (Strikeout ils) = stringify ils
go (Link _ ils _) = stringify ils go (Superscript ils) = stringify ils
go (Image _ ils _) = stringify ils go (Subscript ils) = stringify ils
go (Span _ ils) = stringify ils go (SmallCaps ils) = stringify ils
go _ = "" go (Underline ils) = stringify ils
go (Quoted _ ils) = stringify ils
go (Cite _ ils) = stringify ils
go (Code _ t) = t
go (Math _ t) = t
go (RawInline _ _) = ""
go (Link _ ils _) = stringify ils
go (Image _ ils _) = stringify ils
go (Span _ ils) = stringify ils
go (Note _) = ""
-- | HTML-escape a text value for use in attribute values. -- | HTML-escape a text value for use in attribute values.
-- Defers to the canonical 'Utils.escapeHtmlText'.
esc :: Text -> Text esc :: Text -> Text
esc = T.concatMap escChar esc = U.escapeHtmlText
where
escChar '&' = "&amp;"
escChar '<' = "&lt;"
escChar '>' = "&gt;"
escChar '"' = "&quot;"
escChar c = T.singleton c

View File

@ -25,14 +25,20 @@ apply = walk classifyLink . walk classifyPdfLink
-- Preserves the original path in @data-pdf-src@ so the popup thumbnail -- Preserves the original path in @data-pdf-src@ so the popup thumbnail
-- provider can locate the corresponding @.thumb.png@ file. -- provider can locate the corresponding @.thumb.png@ file.
-- Skips links that are already pointing at the viewer (idempotent). -- Skips links that are already pointing at the viewer (idempotent).
--
-- Handles fragment identifiers (e.g. @/papers/foo.pdf#page=5@): the
-- fragment is stripped before the @.pdf@ suffix check and re-attached
-- after the viewer URL so PDF.js's anchor handling works.
classifyPdfLink :: Inline -> Inline classifyPdfLink :: Inline -> Inline
classifyPdfLink (Link (ident, classes, kvs) ils (url, title)) classifyPdfLink (Link (ident, classes, kvs) ils (url, title))
| "/" `T.isPrefixOf` url | "/" `T.isPrefixOf` url
, ".pdf" `T.isSuffixOf` T.toLower url , let (path, fragment) = T.break (== '#') url
, ".pdf" `T.isSuffixOf` T.toLower path
, "pdf-link" `notElem` classes = , "pdf-link" `notElem` classes =
let viewerUrl = "/pdfjs/web/viewer.html?file=" <> encodeQueryValue url let viewerUrl = "/pdfjs/web/viewer.html?file="
<> encodeQueryValue path <> fragment
classes' = classes ++ ["pdf-link"] classes' = classes ++ ["pdf-link"]
kvs' = kvs ++ [("data-pdf-src", url)] kvs' = kvs ++ [("data-pdf-src", path)]
in Link (ident, classes', kvs') ils (viewerUrl, title) in Link (ident, classes', kvs') ils (viewerUrl, title)
classifyPdfLink x = x classifyPdfLink x = x
@ -53,10 +59,33 @@ classifyLink x = x
-- Helpers -- Helpers
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | True if the URL points outside the site's domain.
--
-- Uses a strict hostname comparison rather than substring matching, so
-- that a hostile lookalike like @evil-levineuwirth.org.attacker.com@ is
-- correctly classified as external (and gets @rel=noopener noreferrer@
-- plus @target=_blank@ applied).
isExternal :: Text -> Bool isExternal :: Text -> Bool
isExternal url = isExternal url =
("http://" `T.isPrefixOf` url || "https://" `T.isPrefixOf` url) case extractHost url of
&& not ("levineuwirth.org" `T.isInfixOf` url) Nothing -> False
Just host ->
not (host == siteHost || ("." <> siteHost) `T.isSuffixOf` host)
where
siteHost = "levineuwirth.org"
-- | Extract the lowercased hostname from an absolute http(s) URL.
-- Returns 'Nothing' for non-http(s) URLs (relative paths, mailto:, etc.).
extractHost :: Text -> Maybe Text
extractHost url
| Just rest <- T.stripPrefix "https://" url = Just (hostOf rest)
| Just rest <- T.stripPrefix "http://" url = Just (hostOf rest)
| otherwise = Nothing
where
hostOf rest =
let withPort = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest
host = T.takeWhile (/= ':') withPort
in T.toLower host
-- | Icon name for the link, matching a file in /images/link-icons/<name>.svg. -- | Icon name for the link, matching a file in /images/link-icons/<name>.svg.
domainIcon :: Text -> Text domainIcon :: Text -> Text

View File

@ -14,12 +14,16 @@
-- the appropriate exhibit attributes for gallery.js TOC integration. -- the appropriate exhibit attributes for gallery.js TOC integration.
module Filters.Score (inlineScores) where module Filters.Score (inlineScores) where
import Control.Exception (IOException, try)
import Data.Maybe (listToMaybe) import Data.Maybe (listToMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO import qualified Data.Text.IO as TIO
import System.Directory (doesFileExist)
import System.FilePath ((</>)) import System.FilePath ((</>))
import System.IO (hPutStrLn, stderr)
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM) import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- | Walk the Pandoc AST and inline all score-fragment divs. -- | Walk the Pandoc AST and inline all score-fragment divs.
-- @baseDir@ is the directory of the source file; image paths in the -- @baseDir@ is the directory of the source file; image paths in the
@ -37,11 +41,40 @@ inlineScore baseDir (Div (_, cls, attrs) blocks)
Nothing -> return $ Div ("", cls, attrs) blocks Nothing -> return $ Div ("", cls, attrs) blocks
Just path -> do Just path -> do
let fullPath = baseDir </> T.unpack path let fullPath = baseDir </> T.unpack path
svgRaw <- TIO.readFile fullPath exists <- doesFileExist fullPath
let html = buildHtml mName mCaption (processColors svgRaw) if not exists
return $ RawBlock (Format "html") html then do
hPutStrLn stderr $
"[Score] missing SVG: " ++ fullPath
++ " (referenced from a score-fragment in " ++ baseDir ++ ")"
return (errorBlock mName ("Missing score: " <> path))
else do
result <- try (TIO.readFile fullPath) :: IO (Either IOException T.Text)
case result of
Left e -> do
hPutStrLn stderr $
"[Score] read error on " ++ fullPath ++ ": " ++ show e
return (errorBlock mName ("Could not read score: " <> path))
Right svgRaw -> do
let html = buildHtml mName mCaption (processColors svgRaw)
return $ RawBlock (Format "html") html
inlineScore _ block = return block inlineScore _ block = return block
-- | Render an inline error block in place of a missing or unreadable score.
-- Mirrors the convention in 'Filters.Viz.errorBlock' so build failures are
-- visible to the author without aborting the entire site build.
errorBlock :: Maybe T.Text -> T.Text -> Block
errorBlock mName message =
RawBlock (Format "html") $ T.concat
[ "<figure class=\"score-fragment score-fragment--error\""
, maybe "" (\n -> " data-exhibit-name=\"" <> escHtml n <> "\"") mName
, ">"
, "<div class=\"score-fragment-error\">"
, escHtml message
, "</div>"
, "</figure>"
]
-- | Extract the image src from the first Para that contains an Image inline. -- | Extract the image src from the first Para that contains an Image inline.
findImagePath :: [Block] -> Maybe T.Text findImagePath :: [Block] -> Maybe T.Text
findImagePath blocks = listToMaybe findImagePath blocks = listToMaybe
@ -86,7 +119,4 @@ buildHtml mName mCaption svgContent = T.concat
] ]
escHtml :: T.Text -> T.Text escHtml :: T.Text -> T.Text
escHtml = T.replace "\"" "&quot;" escHtml = U.escapeHtmlText
. T.replace ">" "&gt;"
. T.replace "<" "&lt;"
. T.replace "&" "&amp;"

View File

@ -33,13 +33,24 @@ convertNote (Note blocks) = do
return $ RawInline "html" (renderNote n blocks) return $ RawInline "html" (renderNote n blocks)
convertNote x = return x convertNote x = return x
-- | Convert a 1-based counter to a letter label: 1→a, 2→b, … 26→z. -- | Convert a 1-based counter to a letter label using base-26 expansion
-- (Excel-column style): 1→a, 2→b, … 26→z, 27→aa, 28→ab, … 52→az,
-- 53→ba, … 702→zz, 703→aaa. Guarantees a unique label per counter so
-- no two sidenotes in a single document collide on @id="sn-…"@.
toLabel :: Int -> Text toLabel :: Int -> Text
toLabel n = T.singleton (toEnum (fromEnum 'a' + (n - 1) `mod` 26)) toLabel n
| n <= 0 = "?"
| otherwise = T.pack (go n)
where
go k
| k <= 0 = ""
| otherwise =
let (q, r) = (k - 1) `divMod` 26
in go q ++ [toEnum (fromEnum 'a' + r)]
renderNote :: Int -> [Block] -> Text renderNote :: Int -> [Block] -> Text
renderNote n blocks = renderNote n blocks =
let inner = replacePTags (blocksToHtml blocks) let inner = blocksToInlineHtml blocks
lbl = toLabel n lbl = toLabel n
in T.concat in T.concat
[ "<sup class=\"sidenote-ref\" id=\"snref-", lbl, "\">" [ "<sup class=\"sidenote-ref\" id=\"snref-", lbl, "\">"
@ -51,13 +62,34 @@ renderNote n blocks =
, "</span>" , "</span>"
] ]
-- | Replace <p> / </p> with inline-block spans so that sidenote content -- | Render a list of Pandoc blocks for inclusion inside an inline @<span
-- stays valid inside the outer <span class="sidenote">. A bare <p> inside -- class="sidenote">@. Each top-level @Para@ is wrapped in a
-- a <span> is invalid HTML and causes browsers to implicitly close the span. -- @<span class="sidenote-para">@ instead of a @<p>@ (which would be
replacePTags :: Text -> Text -- invalid inside a @<span>@); other block types are rendered with the
replacePTags = -- regular Pandoc HTML writer.
T.replace "<p>" "<span class=\"sidenote-para\">" --
. T.replace "</p>" "</span>" -- Operating on the AST is preferred over post-rendered string
-- substitution because the latter mangles content that legitimately
-- contains the literal text @<p>@ (e.g. code samples discussing HTML).
blocksToInlineHtml :: [Block] -> Text
blocksToInlineHtml = T.concat . map renderOne
where
renderOne :: Block -> Text
renderOne (Para inlines) =
"<span class=\"sidenote-para\">"
<> inlinesToHtml inlines
<> "</span>"
renderOne (Plain inlines) =
inlinesToHtml inlines
renderOne b =
blocksToHtml [b]
-- | Render a list of inlines to HTML (no surrounding @<p>@).
inlinesToHtml :: [Inline] -> Text
inlinesToHtml inlines =
case runPure (writeHtml5String (def :: WriterOptions) (Pandoc mempty [Plain inlines])) of
Left _ -> T.empty
Right t -> t
-- | Render a list of Pandoc blocks to an HTML fragment via a pure writer run. -- | Render a list of Pandoc blocks to an HTML fragment via a pure writer run.
blocksToHtml :: [Block] -> Text blocksToHtml :: [Block] -> Text

View File

@ -22,6 +22,7 @@ import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walk)
import qualified Utils as U
-- | Apply smallcaps detection to paragraph-level content. -- | Apply smallcaps detection to paragraph-level content.
-- Skips heading blocks to avoid false positives. -- Skips heading blocks to avoid false positives.
@ -62,10 +63,4 @@ isAbbreviation t =
&& T.any isAlpha t && T.any isAlpha t
escHtml :: Text -> Text escHtml :: Text -> Text
escHtml = T.concatMap esc escHtml = U.escapeHtmlText
where
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '&' = "&amp;"
esc '"' = "&quot;"
esc c = T.singleton c

View File

@ -14,6 +14,7 @@
module Filters.Transclusion (preprocess) where module Filters.Transclusion (preprocess) where
import Data.List (isSuffixOf, isPrefixOf, stripPrefix) import Data.List (isSuffixOf, isPrefixOf, stripPrefix)
import qualified Utils as U
-- | Apply transclusion substitution to the raw Markdown source string. -- | Apply transclusion substitution to the raw Markdown source string.
preprocess :: String -> String preprocess :: String -> String
@ -21,14 +22,18 @@ preprocess = unlines . map processLine . lines
processLine :: String -> String processLine :: String -> String
processLine line = processLine line =
case parseDirective (trim line) of case parseDirective (U.trim line) of
Nothing -> line Nothing -> line
Just (url, secAttr) -> Just (url, secAttr) ->
"<div class=\"transclude\" data-src=\"" ++ url ++ "\"" "<div class=\"transclude\" data-src=\"" ++ escAttr url ++ "\""
++ secAttr ++ "></div>" ++ secAttr ++ "></div>"
-- | Parse a {{slug}} or {{slug#section}} directive. -- | Parse a {{slug}} or {{slug#section}} directive.
-- Returns (absolute-url, section-attribute-string) or Nothing. -- Returns (absolute-url, section-attribute-string) or Nothing.
--
-- The section name is HTML-escaped before being interpolated into the
-- @data-section@ attribute, so a stray @\"@, @&@, @<@, or @>@ in a
-- section name cannot break the surrounding markup.
parseDirective :: String -> Maybe (String, String) parseDirective :: String -> Maybe (String, String)
parseDirective s = do parseDirective s = do
inner <- stripPrefix "{{" s >>= stripSuffix "}}" inner <- stripPrefix "{{" s >>= stripSuffix "}}"
@ -38,15 +43,29 @@ parseDirective s = do
(slug, '#' : sec) (slug, '#' : sec)
| null sec -> Just (slugToUrl slug, "") | null sec -> Just (slugToUrl slug, "")
| otherwise -> Just (slugToUrl slug, | otherwise -> Just (slugToUrl slug,
" data-section=\"" ++ sec ++ "\"") " data-section=\"" ++ escAttr sec ++ "\"")
_ -> Nothing _ -> Nothing
-- | Convert a slug (possibly with leading slash, possibly with path segments) -- | Convert a slug (possibly with leading slash, possibly with path segments)
-- to a root-relative .html URL. -- to a root-relative .html URL. Idempotent for slugs that already end in
-- @.html@ so callers can safely pass either form.
slugToUrl :: String -> String slugToUrl :: String -> String
slugToUrl slug slugToUrl slug
| "/" `isPrefixOf` slug = slug ++ ".html" | ".html" `isSuffixOf` slug, "/" `isPrefixOf` slug = slug
| otherwise = "/" ++ slug ++ ".html" | ".html" `isSuffixOf` slug = "/" ++ slug
| "/" `isPrefixOf` slug = slug ++ ".html"
| otherwise = "/" ++ slug ++ ".html"
-- | Minimal HTML attribute-value escape.
escAttr :: String -> String
escAttr = concatMap esc
where
esc '&' = "&amp;"
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '"' = "&quot;"
esc '\'' = "&#39;"
esc c = [c]
-- | Strip a suffix from a string, returning Nothing if not present. -- | Strip a suffix from a string, returning Nothing if not present.
stripSuffix :: String -> String -> Maybe String stripSuffix :: String -> String -> Maybe String
@ -54,7 +73,3 @@ stripSuffix suf str
| suf `isSuffixOf` str = Just (take (length str - length suf) str) | suf `isSuffixOf` str = Just (take (length str - length suf) str)
| otherwise = Nothing | otherwise = Nothing
-- | Strip leading and trailing spaces.
trim :: String -> String
trim = f . f
where f = reverse . dropWhile (== ' ')

View File

@ -9,9 +9,9 @@
module Filters.Typography (apply) where module Filters.Typography (apply) where
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walk)
import Utils (escapeHtmlText)
-- | Apply all typographic transformations to the document. -- | Apply all typographic transformations to the document.
apply :: Pandoc -> Pandoc apply :: Pandoc -> Pandoc
@ -38,21 +38,17 @@ abbrevMap =
-- | If the Str token exactly matches a known abbreviation, replace it with -- | If the Str token exactly matches a known abbreviation, replace it with
-- a @RawInline "html"@ @<abbr>@ element; otherwise leave it unchanged. -- a @RawInline "html"@ @<abbr>@ element; otherwise leave it unchanged.
--
-- Both the @title@ attribute and the visible body pass through
-- 'escapeHtmlText' for consistency with every other raw-HTML emitter
-- in the filter pipeline. The abbreviations themselves are ASCII-safe
-- so this is defense-in-depth rather than a live hazard.
expandAbbrev :: Inline -> Inline expandAbbrev :: Inline -> Inline
expandAbbrev (Str t) = expandAbbrev (Str t) =
case lookup t abbrevMap of case lookup t abbrevMap of
Just title -> Just title ->
RawInline "html" $ RawInline "html" $
"<abbr title=\"" <> title <> "\">" <> escHtml t <> "</abbr>" "<abbr title=\"" <> escapeHtmlText title <> "\">"
<> escapeHtmlText t <> "</abbr>"
Nothing -> Str t Nothing -> Str t
expandAbbrev x = x expandAbbrev x = x
-- | Minimal HTML escaping for the abbr content (should be plain text).
escHtml :: Text -> Text
escHtml = T.concatMap esc
where
esc '<' = "&lt;"
esc '>' = "&gt;"
esc '&' = "&amp;"
esc '"' = "&quot;"
esc c = T.singleton c

View File

@ -39,11 +39,14 @@ module Filters.Viz (inlineViz) where
import Control.Exception (IOException, catch) import Control.Exception (IOException, catch)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import System.Directory (doesFileExist)
import System.Exit (ExitCode (..)) import System.Exit (ExitCode (..))
import System.FilePath ((</>)) import System.FilePath ((</>))
import System.IO (hPutStrLn, stderr)
import System.Process (readProcessWithExitCode) import System.Process (readProcessWithExitCode)
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM) import Text.Pandoc.Walk (walkM)
import qualified Utils as U
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Public entry point -- Public entry point
@ -87,19 +90,26 @@ transformBlock _ b = return b
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Run @python3 <script>@. Returns the script\'s stdout on success, or an -- | Run @python3 <script>@. Returns the script\'s stdout on success, or an
-- error message on failure (non-zero exit or missing @script=@ attribute). -- error message on failure (non-zero exit, missing @script=@ attribute, or
-- missing script file).
runScript :: FilePath -> [(T.Text, T.Text)] -> IO (Either String T.Text) runScript :: FilePath -> [(T.Text, T.Text)] -> IO (Either String T.Text)
runScript baseDir attrs = runScript baseDir attrs =
case lookup "script" attrs of case lookup "script" attrs of
Nothing -> return (Left "missing script= attribute") Nothing -> return (Left "missing script= attribute")
Just p -> do Just p -> do
let fullPath = baseDir </> T.unpack p let fullPath = baseDir </> T.unpack p
(ec, out, err) <- exists <- doesFileExist fullPath
readProcessWithExitCode "python3" [fullPath] "" if not exists
`catch` (\e -> return (ExitFailure 1, "", show (e :: IOException))) then return (Left ("script not found: " ++ fullPath))
return $ case ec of else do
ExitSuccess -> Right (T.pack out) (ec, out, err) <-
ExitFailure _ -> Left (if null err then "non-zero exit" else err) readProcessWithExitCode "python3" [fullPath] ""
`catch` (\e -> return (ExitFailure 1, "", show (e :: IOException)))
return $ case ec of
ExitSuccess -> Right (T.pack out)
ExitFailure _ -> Left $
"in " ++ fullPath ++ ": "
++ (if null err then "non-zero exit" else err)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- SVG colour post-processing (mirrors Filters.Score.processColors) -- SVG colour post-processing (mirrors Filters.Score.processColors)
@ -173,10 +183,7 @@ attr :: T.Text -> [(T.Text, T.Text)] -> T.Text
attr key kvs = fromMaybe "" (lookup key kvs) attr key kvs = fromMaybe "" (lookup key kvs)
warn :: String -> String -> IO () warn :: String -> String -> IO ()
warn kind msg = putStrLn $ "[Viz] " ++ kind ++ " error: " ++ msg warn kind msg = hPutStrLn stderr $ "[Viz] " ++ kind ++ " error: " ++ msg
escHtml :: T.Text -> T.Text escHtml :: T.Text -> T.Text
escHtml = T.replace "&" "&amp;" escHtml = U.escapeHtmlText
. T.replace "<" "&lt;"
. T.replace ">" "&gt;"
. T.replace "\"" "&quot;"

View File

@ -15,6 +15,7 @@ module Filters.Wikilinks (preprocess) where
import Data.Char (isAlphaNum, toLower, isSpace) import Data.Char (isAlphaNum, toLower, isSpace)
import Data.List (intercalate) import Data.List (intercalate)
import qualified Utils as U
-- | Scan the raw Markdown source for @[[…]]@ wikilinks and replace them -- | Scan the raw Markdown source for @[[…]]@ wikilinks and replace them
-- with standard Markdown link syntax. -- with standard Markdown link syntax.
@ -29,21 +30,49 @@ preprocess ('[':'[':rest) =
preprocess (c:rest) = c : preprocess rest preprocess (c:rest) = c : preprocess rest
-- | Convert the inner content of @[[…]]@ to a Markdown link. -- | Convert the inner content of @[[…]]@ to a Markdown link.
--
-- Display text is escaped via 'escMdLinkText' so that a literal @]@, @[@,
-- or backslash in the display does not break the surrounding Markdown
-- link syntax. The URL itself is produced by 'slugify' and therefore only
-- ever contains @[a-z0-9-]@, so no URL-side encoding is needed — adding
-- one would be defense against a character set we can't produce.
toMarkdownLink :: String -> String toMarkdownLink :: String -> String
toMarkdownLink inner = toMarkdownLink inner =
let (title, display) = splitOnPipe inner let (title, display) = splitOnPipe inner
url = "/" ++ slugify title url = "/" ++ slugify title
in "[" ++ display ++ "](" ++ url ++ ")" in "[" ++ escMdLinkText display ++ "](" ++ url ++ ")"
-- | Escape the minimum set of characters that would prematurely terminate
-- a Markdown link's display-text segment: backslash (escape char), @[@,
-- and @]@. Backslash MUST be escaped first so the escapes we introduce
-- for @[@ and @]@ are not themselves re-escaped.
--
-- Deliberately NOT escaped: @_@, @*@, @\`@, @<@. Those are inline
-- formatting markers in Markdown and escaping them would strip the
-- author's ability to put emphasis, code, or inline HTML in a wikilink's
-- display text.
escMdLinkText :: String -> String
escMdLinkText = concatMap esc
where
esc '\\' = "\\\\"
esc '[' = "\\["
esc ']' = "\\]"
esc c = [c]
-- | Split on the first @|@; if none, display = title. -- | Split on the first @|@; if none, display = title.
splitOnPipe :: String -> (String, String) splitOnPipe :: String -> (String, String)
splitOnPipe s = splitOnPipe s =
case break (== '|') s of case break (== '|') s of
(title, '|':display) -> (trim title, trim display) (title, '|':display) -> (U.trim title, U.trim display)
_ -> (trim s, trim s) _ -> (U.trim s, U.trim s)
-- | Produce a URL slug: lowercase, words joined by hyphens, -- | Produce a URL slug: lowercase, words joined by hyphens,
-- non-alphanumeric characters removed. -- non-alphanumeric characters removed.
--
-- Trailing punctuation is dropped rather than preserved as a dangling
-- hyphen — @slugify "end." == "end"@, not @"end-"@. This is intentional:
-- author-authored wikilinks tend to end sentences with a period and the
-- desired URL is almost always the terminal-punctuation-free form.
slugify :: String -> String slugify :: String -> String
slugify = intercalate "-" . words . map toLowerAlnum slugify = intercalate "-" . words . map toLowerAlnum
where where
@ -55,5 +84,3 @@ slugify = intercalate "-" . words . map toLowerAlnum
-- split correctly and double-hyphens are -- split correctly and double-hyphens are
-- collapsed by 'words' -- collapsed by 'words'
trim :: String -> String
trim = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ')

View File

@ -1,2 +0,0 @@
-- | Metadata utilities (Phase 2+).
module Metadata where

100
build/Patterns.hs Normal file
View File

@ -0,0 +1,100 @@
{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Canonical content-pattern definitions, shared across modules.
--
-- Several modules need to enumerate "all author-written content" or
-- "all essays". Historically each module hard-coded its own slightly
-- different list, which produced silent omissions (e.g. directory-form
-- essays not appearing on author pages). This module is the single source
-- of truth — every place that needs a content pattern should import from
-- here, not write its own.
module Patterns
( -- * Per-section patterns
essayPattern
, draftEssayPattern
, blogPattern
, poetryPattern
, fictionPattern
, musicPattern
, standalonePagesPattern
-- * Aggregated patterns
, allWritings -- essays + blog + poetry + fiction
, allContent -- everything that backlinks should index
, authorIndexable -- everything that should appear on /authors/{slug}/
, tagIndexable -- everything that should appear on /<tag>/
) where
import Hakyll
-- ---------------------------------------------------------------------------
-- Per-section
-- ---------------------------------------------------------------------------
-- | All published essays — flat files and directory-based (with co-located
-- assets). Drafts under @content/drafts/essays/**@ are intentionally NOT
-- included; 'Site.rules' unions them in conditionally when @SITE_ENV=dev@.
essayPattern :: Pattern
essayPattern = "content/essays/*.md" .||. "content/essays/*/index.md"
-- | In-progress essay drafts. Matches the flat and directory forms under
-- @content/drafts/essays/@. Only 'Site.rules' consumes this, gated on
-- @SITE_ENV=dev@ — every other module that enumerates content (Authors,
-- Tags, Backlinks, Stats, feeds) sees only 'essayPattern', so drafts are
-- automatically invisible to listings, tags, authors, backlinks, and stats.
draftEssayPattern :: Pattern
draftEssayPattern =
"content/drafts/essays/*.md"
.||. "content/drafts/essays/*/index.md"
-- | All blog posts. Currently flat-only; co-located blog assets would
-- require a directory variant analogous to 'essayPattern'.
blogPattern :: Pattern
blogPattern = "content/blog/*.md"
-- | All poetry: flat poems plus collection poems, excluding collection
-- index pages (which are landing pages, not poems).
poetryPattern :: Pattern
poetryPattern =
"content/poetry/*.md"
.||. ("content/poetry/*/*.md" .&&. complement "content/poetry/*/index.md")
-- | All fiction. Currently flat-only.
fictionPattern :: Pattern
fictionPattern = "content/fiction/*.md"
-- | Music compositions (landing pages live at @content/music/<slug>/index.md@).
musicPattern :: Pattern
musicPattern = "content/music/*/index.md"
-- | Top-level standalone pages (about, colophon, current, gpg, …).
standalonePagesPattern :: Pattern
standalonePagesPattern = "content/*.md"
-- ---------------------------------------------------------------------------
-- Aggregations
-- ---------------------------------------------------------------------------
-- | All long-form authored writings.
allWritings :: Pattern
allWritings = essayPattern .||. blogPattern .||. poetryPattern .||. fictionPattern
-- | Every content file the backlinks pass should index. Includes music
-- landing pages and top-level standalone pages, in addition to writings.
allContent :: Pattern
allContent =
essayPattern
.||. blogPattern
.||. poetryPattern
.||. fictionPattern
.||. musicPattern
.||. standalonePagesPattern
-- | Content shown on author index pages — essays + blog posts.
-- (Poetry and fiction have their own dedicated indexes and are not
-- aggregated by author.)
authorIndexable :: Pattern
authorIndexable = (essayPattern .||. blogPattern) .&&. hasNoVersion
-- | Content shown on tag index pages — essays + blog posts.
tagIndexable :: Pattern
tagIndexable = (essayPattern .||. blogPattern) .&&. hasNoVersion

View File

@ -18,10 +18,12 @@
module SimilarLinks (similarLinksField) where module SimilarLinks (similarLinksField) where
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import qualified Data.ByteString as BS
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Aeson as Aeson import qualified Data.Aeson as Aeson
import Hakyll import Hakyll
@ -83,7 +85,29 @@ normaliseUrl url =
t3 = fromMaybe t2 (T.stripSuffix "index.html" t2) t3 = fromMaybe t2 (T.stripSuffix "index.html" t2)
-- strip bare .html extension only for non-index pages -- strip bare .html extension only for non-index pages
t4 = fromMaybe t3 (T.stripSuffix ".html" t3) t4 = fromMaybe t3 (T.stripSuffix ".html" t3)
in T.unpack t4 in percentDecode (T.unpack t4)
-- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths
-- collide with their decoded form on map lookup. Mirrors
-- 'Backlinks.percentDecode'; the two implementations are intentionally
-- duplicated because they apply different normalisations *before*
-- decoding (Backlinks strips @.html@ unconditionally; SimilarLinks
-- preserves the trailing-slash form for index pages).
percentDecode :: String -> String
percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
where
go [] = []
go ('%':a:b:rest)
| Just hi <- hexDigit a
, Just lo <- hexDigit b
= fromIntegral (hi * 16 + lo) : go rest
go (c:rest) = fromIntegral (fromEnum c) : go rest
hexDigit c
| c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0')
| c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10)
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
| otherwise = Nothing
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- HTML rendering -- HTML rendering

View File

@ -2,10 +2,13 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Site (rules) where module Site (rules) where
import Control.Monad (filterM) import Control.Monad (filterM, when)
import Data.List (intercalate, isPrefixOf) import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import System.Environment (lookupEnv)
import System.FilePath (takeDirectory, takeFileName, replaceExtension) import System.FilePath (takeDirectory, takeFileName, replaceExtension)
import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy.Char8 as LBS
import Hakyll import Hakyll
import Authors (buildAllAuthors, applyAuthorRules) import Authors (buildAllAuthors, applyAuthorRules)
import Backlinks (backlinkRules) import Backlinks (backlinkRules)
@ -15,14 +18,11 @@ import Catalog (musicCatalogCtx)
import Commonplace (commonplaceCtx) import Commonplace (commonplaceCtx)
import Contexts (siteCtx, essayCtx, postCtx, pageCtx, poetryCtx, fictionCtx, compositionCtx, import Contexts (siteCtx, essayCtx, postCtx, pageCtx, poetryCtx, fictionCtx, compositionCtx,
contentKindField) contentKindField)
import qualified Patterns as P
import Tags (buildAllTags, applyTagRules) import Tags (buildAllTags, applyTagRules)
import Pagination (blogPaginateRules) import Pagination (blogPaginateRules)
import Stats (statsRules) import Stats (statsRules)
-- All essays: flat files and directory-based (with co-located assets).
allEssays :: Pattern
allEssays = "content/essays/*.md" .||. "content/essays/*/index.md"
-- Poems inside collection subdirectories, excluding their index pages. -- Poems inside collection subdirectories, excluding their index pages.
collectionPoems :: Pattern collectionPoems :: Pattern
collectionPoems = "content/poetry/*/*.md" .&&. complement "content/poetry/*/index.md" collectionPoems = "content/poetry/*/*.md" .&&. complement "content/poetry/*/index.md"
@ -51,6 +51,16 @@ musicFeedConfig = FeedConfiguration
rules :: Rules () rules :: Rules ()
rules = do rules = do
-- ---------------------------------------------------------------------------
-- Build mode. SITE_ENV=dev (set by `make dev` / `make watch`) includes
-- drafts under content/drafts/**; anything else (unset, "deploy", "build")
-- excludes them entirely from every match, listing, and asset rule below.
-- ---------------------------------------------------------------------------
isDev <- preprocess $ (== Just "dev") <$> lookupEnv "SITE_ENV"
let allEssays = if isDev
then P.essayPattern .||. P.draftEssayPattern
else P.essayPattern
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Backlinks (pass 1: link extraction; pass 2: JSON generation) -- Backlinks (pass 1: link extraction; pass 2: JSON generation)
-- Must run before content rules so dependencies resolve correctly. -- Must run before content rules so dependencies resolve correctly.
@ -70,8 +80,14 @@ rules = do
applyTagRules tags siteCtx applyTagRules tags siteCtx
statsRules tags statsRules tags
-- Per-page JS files — authored alongside content in content/**/*.js -- Per-page JS files — authored alongside content in content/**/*.js.
match "content/**/*.js" $ do -- Draft JS is handled by a separate dev-only rule below.
match ("content/**/*.js" .&&. complement "content/drafts/**") $ do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
-- Per-page JS co-located with draft essays (dev-only).
when isDev $ match "content/drafts/**/*.js" $ do
route $ gsubRoute "content/" (const "") route $ gsubRoute "content/" (const "")
compile copyFileCompiler compile copyFileCompiler
@ -177,14 +193,25 @@ rules = do
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Essays — flat (content/essays/foo.md → essays/foo.html) and -- Essays — flat (content/essays/foo.md → essays/foo.html) and
-- directory-based (content/essays/slug/index.md → essays/slug/index.html) -- directory-based (content/essays/slug/index.md → essays/slug/index.html).
-- In dev mode, drafts under content/drafts/essays/ route to
-- drafts/essays/foo.html (flat) or drafts/essays/slug/index.html (dir).
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
match allEssays $ do match allEssays $ do
route $ customRoute $ \ident -> route $ customRoute $ \ident ->
let fp = toFilePath ident let fp = toFilePath ident
in if takeFileName fp == "index.md" fname = takeFileName fp
then replaceExtension (drop 8 fp) "html" isIndex = fname == "index.md"
else "essays/" ++ replaceExtension (takeFileName fp) "html" isDraft = "content/drafts/essays/" `isPrefixOf` fp
in case (isDraft, isIndex) of
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
(True, True) -> replaceExtension (drop 8 fp) "html"
-- content/drafts/essays/foo.md → drafts/essays/foo.html
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
-- content/essays/slug/index.md → essays/slug/index.html
(False, True) -> replaceExtension (drop 8 fp) "html"
-- content/essays/foo.md → essays/foo.html
(False, False) -> "essays/" ++ replaceExtension fname "html"
compile $ essayCompiler compile $ essayCompiler
>>= saveSnapshot "content" >>= saveSnapshot "content"
>>= loadAndApplyTemplate "templates/essay.html" essayCtx >>= loadAndApplyTemplate "templates/essay.html" essayCtx
@ -198,6 +225,13 @@ rules = do
route $ gsubRoute "content/" (const "") route $ gsubRoute "content/" (const "")
compile copyFileCompiler compile copyFileCompiler
-- Static assets co-located with draft essays (dev-only).
when isDev $ match ("content/drafts/essays/**"
.&&. complement "content/drafts/essays/*.md"
.&&. complement "content/drafts/essays/*/index.md") $ do
route $ gsubRoute "content/" (const "")
compile copyFileCompiler
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Blog posts -- Blog posts
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -400,7 +434,7 @@ rules = do
poetry <- loadAll ("content/poetry/*.md" .&&. hasNoVersion) :: Compiler [Item String] poetry <- loadAll ("content/poetry/*.md" .&&. hasNoVersion) :: Compiler [Item String]
routes <- mapM (getRoute . itemIdentifier) (essays ++ posts ++ fiction ++ poetry) routes <- mapM (getRoute . itemIdentifier) (essays ++ posts ++ fiction ++ poetry)
let urls = [ "/" ++ r | Just r <- routes ] let urls = [ "/" ++ r | Just r <- routes ]
makeItem $ "[" ++ intercalate "," (map show urls) ++ "]" makeItem $ LBS.unpack (Aeson.encode urls)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Atom feed — all content sorted by date -- Atom feed — all content sorted by date

View File

@ -29,7 +29,9 @@ import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
import Data.Time.Calendar (Day, diffDays) import Data.Time.Calendar (Day, diffDays)
import Data.Time.Format (parseTimeM, formatTime, defaultTimeLocale) import Data.Time.Format (parseTimeM, formatTime, defaultTimeLocale)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Exit (ExitCode (..)) import System.Exit (ExitCode (..))
import System.IO (hPutStrLn, stderr)
import System.Process (readProcessWithExitCode) import System.Process (readProcessWithExitCode)
import Hakyll import Hakyll
@ -39,9 +41,12 @@ import Hakyll
-- | Read @IGNORE.txt@ (paths relative to project root, one per line). -- | Read @IGNORE.txt@ (paths relative to project root, one per line).
-- Returns an empty list when the file is absent or empty. -- Returns an empty list when the file is absent or empty.
--
-- Uses strict text IO so the file handle is released immediately rather
-- than left dangling on the lazy spine of 'readFile'.
readIgnore :: IO [FilePath] readIgnore :: IO [FilePath]
readIgnore = readIgnore =
(filter (not . null) . lines <$> readFile "IGNORE.txt") (filter (not . null) . map T.unpack . T.lines <$> TIO.readFile "IGNORE.txt")
`catch` \(_ :: IOException) -> return [] `catch` \(_ :: IOException) -> return []
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -49,13 +54,24 @@ readIgnore =
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Return commit dates (ISO "YYYY-MM-DD", newest-first) for @fp@. -- | Return commit dates (ISO "YYYY-MM-DD", newest-first) for @fp@.
--
-- Logs git's stderr to the build's stderr when present so the author
-- isn't left in the dark when a file isn't tracked yet (the warning
-- otherwise vanishes silently).
gitDates :: FilePath -> IO [String] gitDates :: FilePath -> IO [String]
gitDates fp = do gitDates fp = do
(ec, out, _) <- readProcessWithExitCode (ec, out, err) <- readProcessWithExitCode
"git" ["log", "--follow", "--format=%ad", "--date=short", "--", fp] "" "git" ["log", "--follow", "--format=%ad", "--date=short", "--", fp] ""
case ec of case ec of
ExitFailure _ -> return [] ExitFailure _ -> do
ExitSuccess -> return $ filter (not . null) (lines out) let msg = if null err then "git log failed" else err
hPutStrLn stderr $ "[Stability] " ++ fp ++ ": " ++ msg
return []
ExitSuccess -> do
case err of
"" -> return ()
_ -> hPutStrLn stderr $ "[Stability] " ++ fp ++ ": " ++ err
return $ filter (not . null) (lines out)
-- | Parse an ISO "YYYY-MM-DD" string to a 'Day'. -- | Parse an ISO "YYYY-MM-DD" string to a 'Day'.
parseIso :: String -> Maybe Day parseIso :: String -> Maybe Day
@ -69,17 +85,38 @@ daySpan oldest newest =
_ -> 0 _ -> 0
-- | Derive stability label from commit dates (newest-first). -- | Derive stability label from commit dates (newest-first).
--
-- Thresholds (commit count + age in days since first commit):
--
-- * @volatile@ — solo commit OR less than two weeks old.
-- * @revising@ — under six commits AND under three months old.
-- * @fairly stable@ — under sixteen commits OR under one year old.
-- * @stable@ — under thirty-one commits OR under two years old.
-- * @established@ — anything beyond.
--
-- These cliffs are deliberately conservative: a fast burst of commits
-- early in a piece's life looks volatile until enough time has passed
-- to demonstrate it has settled.
stabilityFromDates :: [String] -> String stabilityFromDates :: [String] -> String
stabilityFromDates [] = "volatile" stabilityFromDates [] = "volatile"
stabilityFromDates dates = stabilityFromDates dates@(newest : _) =
classify (length dates) (daySpan (last dates) (head dates)) let oldest = case reverse dates of
(x : _) -> x
[] -> newest -- unreachable; matched above
in classify (length dates) (daySpan oldest newest)
where where
classify n age classify n age
| n <= 1 || age < 14 = "volatile" | n <= 1 || age < volatileAge = "volatile"
| n <= 5 && age < 90 = "revising" | n <= 5 && age < revisingAge = "revising"
| n <= 15 || age < 365 = "fairly stable" | n <= 15 || age < fairlyStableAge = "fairly stable"
| n <= 30 || age < 730 = "stable" | n <= 30 || age < stableAge = "stable"
| otherwise = "established" | otherwise = "established"
volatileAge, revisingAge, fairlyStableAge, stableAge :: Int
volatileAge = 14
revisingAge = 90
fairlyStableAge = 365
stableAge = 730
-- | Format an ISO date as "%-d %B %Y" (e.g. "16 March 2026"). -- | Format an ISO date as "%-d %B %Y" (e.g. "16 March 2026").
fmtIso :: String -> String fmtIso :: String -> String

View File

@ -8,15 +8,18 @@ module Stats (statsRules) where
import Control.Exception (IOException, catch) import Control.Exception (IOException, catch)
import Control.Monad (forM) import Control.Monad (forM)
import Data.List (find, isSuffixOf, sort, sortBy) import Data.Char (isSpace, toLower)
import Data.List (find, isPrefixOf, isSuffixOf, sort, sortBy)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe) import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
import Data.Ord (comparing, Down (..)) import Data.Ord (comparing, Down (..))
import qualified Data.Set as Set import qualified Data.Set as Set
import Data.String (fromString)
import Data.Time (getCurrentTime, formatTime, defaultTimeLocale, import Data.Time (getCurrentTime, formatTime, defaultTimeLocale,
Day, parseTimeM, utctDay, addDays, diffDays) Day, parseTimeM, utctDay, addDays, diffDays)
import Data.Time.Calendar (toGregorian, dayOfWeek) import Data.Time.Calendar (toGregorian, dayOfWeek)
import System.Directory (doesDirectoryExist, getFileSize, listDirectory) import System.Directory (doesDirectoryExist, getFileSize, listDirectory,
pathIsSymbolicLink)
import System.Exit (ExitCode (..)) import System.Exit (ExitCode (..))
import System.FilePath (takeExtension, (</>)) import System.FilePath (takeExtension, (</>))
import System.Process (readProcessWithExitCode) import System.Process (readProcessWithExitCode)
@ -26,10 +29,15 @@ import qualified Data.Aeson.Key as AK
import qualified Data.Aeson.KeyMap as KM import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding as TE
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Text.Blaze.Html.Renderer.String (renderHtml)
import qualified Text.Blaze.Internal as BI
import Hakyll import Hakyll
import Authors (authorLinksField) import Contexts (siteCtx, authorLinksField)
import Contexts (siteCtx) import qualified Patterns as P
import Utils (readingTime) import Utils (readingTime)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -100,10 +108,49 @@ pctStr _ 0 = "—"
pctStr n total = show (n * 100 `div` total) ++ "%" pctStr n total = show (n * 100 `div` total) ++ "%"
-- | Strip HTML tags for plain-text word counting. -- | Strip HTML tags for plain-text word counting.
--
-- Handles:
-- * Tag bodies, including @>@ inside double-quoted attribute values
-- (so @\<img alt=\"a > b\"\>@ doesn't slice the surrounding text).
-- * HTML comments @\<!-- ... --\>@ as a unit.
-- * @\<![CDATA[ ... ]]\>@ sections.
--
-- This is still a heuristic — it does not validate the HTML — but it
-- closes the most common ways for "tag stripping" to leak content.
stripHtmlTags :: String -> String stripHtmlTags :: String -> String
stripHtmlTags [] = [] stripHtmlTags = go
stripHtmlTags ('<':rest) = stripHtmlTags (drop 1 (dropWhile (/= '>') rest)) where
stripHtmlTags (c:rest) = c : stripHtmlTags rest go [] = []
go ('<':'!':'-':'-':rest) = go (dropComment rest)
go ('<':'!':'[':'C':'D':'A':'T':'A':'[':rest)
= go (dropCdata rest)
go ('<':rest) = go (dropTag rest)
go (c:rest) = c : go rest
-- Drop everything up to and including "-->".
dropComment ('-':'-':'>':rs) = rs
dropComment (_:rs) = dropComment rs
dropComment [] = []
-- Drop everything up to and including "]]>".
dropCdata (']':']':'>':rs) = rs
dropCdata (_:rs) = dropCdata rs
dropCdata [] = []
-- Drop a tag body, respecting double-quoted attribute values.
dropTag ('"':rs) = dropTag (skipQuoted rs)
dropTag ('\'':rs) = dropTag (skipApos rs)
dropTag ('>':rs) = rs
dropTag (_:rs) = dropTag rs
dropTag [] = []
skipQuoted ('"':rs) = rs
skipQuoted (_:rs) = skipQuoted rs
skipQuoted [] = []
skipApos ('\'':rs) = rs
skipApos (_:rs) = skipApos rs
skipApos [] = []
-- | Normalise a page URL for backlink map lookup (strip trailing .html). -- | Normalise a page URL for backlink map lookup (strip trailing .html).
normUrl :: String -> String normUrl :: String -> String
@ -114,10 +161,16 @@ normUrl u
pad2 :: (Show a, Integral a) => a -> String pad2 :: (Show a, Integral a) => a -> String
pad2 n = if n < 10 then "0" ++ show n else show n pad2 n = if n < 10 then "0" ++ show n else show n
-- | Median of a non-empty list; returns 0 for empty. -- | Median of a non-empty list; returns 0 for empty. Uses 'drop' +
-- pattern match instead of @(!!)@ so the function is total in its
-- own implementation, not just by external invariant.
median :: [Int] -> Int median :: [Int] -> Int
median [] = 0 median [] = 0
median xs = let s = sort xs in s !! (length s `div` 2) median xs =
case drop (length xs `div` 2) (sort xs) of
(m : _) -> m
[] -> 0 -- unreachable: length xs >= 1 above
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Date helpers (for /stats/ page) -- Date helpers (for /stats/ page)
@ -145,83 +198,169 @@ shortMonth m = case m of
9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec" 9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec"
_ -> "" _ -> ""
-- ---------------------------------------------------------------------------
-- URL sanitization and core HTML combinators
-- ---------------------------------------------------------------------------
-- | Defense-in-depth URL allowlist: reject anything that isn't an internal
-- path, a fragment, or an explicit safe scheme. Case-insensitive and
-- whitespace-tolerant to block @JavaScript:@, @\tjavascript:@, @data:@, etc.
-- @http://@ is intentionally excluded to avoid mixed-content warnings.
--
-- Protocol-relative URLs (@//evil.com@) are rejected because the leading
-- slash would otherwise admit them through the @\"\/\"@ prefix check.
isSafeUrl :: String -> Bool
isSafeUrl u =
let norm = map toLower (dropWhile isSpace u)
in not ("//" `isPrefixOf` norm)
&& any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
safeHref :: String -> H.AttributeValue
safeHref u
| isSafeUrl u = H.stringValue u
| otherwise = H.stringValue "#"
-- | Shorthand for 'H.toHtml' over a 'String'.
txt :: String -> H.Html
txt = H.toHtml
-- | Anchor element with escaped title text and URL sanitized via 'safeHref'.
-- Use for trusted plain-text labels such as tag slugs.
link :: String -> String -> H.Html
link url title = H.a H.! A.href (safeHref url) $ H.toHtml title
-- | Anchor for a content page, where the title comes from frontmatter and
-- may contain author-authored inline HTML (e.g. @<em>Book Title</em>@).
-- The URL is still sanitized via 'safeHref'; the title is emitted
-- pre-escaped, matching site convention that metadata titles are
-- author-controlled trusted HTML.
pageLink :: String -> String -> H.Html
pageLink url title = H.a H.! A.href (safeHref url) $ H.preEscapedToHtml title
-- | Typed section header followed by its body content.
section :: String -> String -> H.Html -> H.Html
section id_ title body = do
H.h2 H.! A.id (H.stringValue id_) $ H.toHtml title
body
-- | Build-telemetry table with header row, body rows, and an optional total
-- row. Cell contents are pre-rendered 'H.Html' so callers may embed links or
-- emphasis inside cells without risking double-escaping.
table :: [String] -> [[H.Html]] -> Maybe [H.Html] -> H.Html
table headers rows mFoot =
H.table H.! A.class_ "build-table" $ do
H.thead $ H.tr $ mapM_ (H.th . H.toHtml) headers
H.tbody $ mapM_ renderRow rows
maybe (return ()) renderFoot mFoot
where
renderRow cells = H.tr $ mapM_ H.td cells
renderFoot cells = H.tfoot $
H.tr H.! A.class_ "build-total" $ mapM_ H.td cells
-- | Two-column metadata block: each pair becomes a @<dt>/<dd>@ entry. Values
-- are 'H.Html' to allow mixing links and plain text.
dl :: [(String, H.Html)] -> H.Html
dl pairs = H.dl H.! A.class_ "build-meta" $
mapM_ (\(k, v) -> do H.dt (H.toHtml k); H.dd v) pairs
-- ---------------------------------------------------------------------------
-- SVG / custom element helpers (no blaze-svg dependency)
-- ---------------------------------------------------------------------------
svgTag, rectTag, textTag, titleTag :: H.Html -> H.Html
svgTag = BI.customParent "svg"
rectTag = BI.customParent "rect"
textTag = BI.customParent "text"
titleTag = BI.customParent "title"
-- | Attach an attribute that isn't in 'Text.Blaze.Html5.Attributes' (e.g.
-- SVG @viewBox@, @x@, @y@, or @data-target@).
customAttr :: String -> String -> H.Attribute
customAttr name val = BI.customAttribute (fromString name) (fromString val)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Heatmap SVG -- Heatmap SVG
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | 52-week writing activity heatmap (inline SVG, CSS-variable colors). -- | 52-week writing activity heatmap. Styled via @.heatmap-svg@ rules in
renderHeatmap :: Map.Map Day Int -> Day -> String -- static/css/build.css (no inline @<style>@).
renderHeatmap :: Map.Map Day Int -> Day -> H.Html
renderHeatmap wordsByDay today = renderHeatmap wordsByDay today =
let cellSz = 10 :: Int let cellSz = 10 :: Int
gap = 2 :: Int gap = 2 :: Int
step = cellSz + gap step = cellSz + gap
hdrH = 22 :: Int -- vertical space for month labels hdrH = 22 :: Int -- vertical space for month labels
nWeeks = 52 nWeeks = 52
-- First Monday of the 52-week window -- First Monday of the 52-week window
startDay = addDays (fromIntegral (-(nWeeks - 1)) * 7) (weekStart today) startDay = addDays (fromIntegral (-(nWeeks - 1)) * 7) (weekStart today)
nDays = diffDays today startDay + 1 nDays = diffDays today startDay + 1
allDays = [addDays i startDay | i <- [0 .. nDays - 1]] allDays = [addDays i startDay | i <- [0 .. nDays - 1]]
weekOf d = fromIntegral (diffDays d startDay `div` 7) :: Int weekOf d = fromIntegral (diffDays d startDay `div` 7) :: Int
dowOf d = fromEnum (dayOfWeek d) -- Mon=0..Sun=6 dowOf d = fromEnum (dayOfWeek d) -- Mon=0..Sun=6
svgW = (nWeeks - 1) * step + cellSz svgW = (nWeeks - 1) * step + cellSz
svgH = 6 * step + cellSz + hdrH svgH = 6 * step + cellSz + hdrH
-- Month labels: one per first-of-month day monthLabel d =
monthLbls = concatMap (\d ->
let (_, mo, da) = toGregorian d let (_, mo, da) = toGregorian d
in if da == 1 in if da == 1
then "<text class=\"hm-lbl\" x=\"" ++ show (weekOf d * step) then textTag H.! A.class_ "hm-lbl"
++ "\" y=\"14\">" ++ shortMonth mo ++ "</text>" H.! customAttr "x" (show (weekOf d * step))
else "") allDays H.! customAttr "y" "14"
$ txt (shortMonth mo)
else mempty
-- One rect per day dayCell d =
cells = concatMap (\d -> let wc = fromMaybe 0 (Map.lookup d wordsByDay)
let wc = fromMaybe 0 (Map.lookup d wordsByDay)
(yr, mo, da) = toGregorian d (yr, mo, da) = toGregorian d
x = weekOf d * step x = weekOf d * step
y = dowOf d * step + hdrH y = dowOf d * step + hdrH
tip = show yr ++ "-" ++ pad2 mo ++ "-" ++ pad2 da tip = show yr ++ "-" ++ pad2 mo ++ "-" ++ pad2 da
++ if wc > 0 then ": " ++ commaInt wc ++ " words" else "" ++ if wc > 0 then ": " ++ commaInt wc ++ " words" else ""
in "<rect class=\"" ++ heatClass wc ++ "\"" in rectTag H.! A.class_ (H.stringValue (heatClass wc))
++ " x=\"" ++ show x ++ "\" y=\"" ++ show y ++ "\"" H.! customAttr "x" (show x)
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\"" H.! customAttr "y" (show y)
++ " rx=\"2\"><title>" ++ tip ++ "</title></rect>") allDays H.! customAttr "width" (show cellSz)
H.! customAttr "height" (show cellSz)
H.! customAttr "rx" "2"
$ titleTag (txt tip)
-- Inline legend (five sample rects)
legendW = 5 * step - gap legendW = 5 * step - gap
legendSvg = legendCell i =
"<svg width=\"" ++ show legendW ++ "\" height=\"" ++ show cellSz ++ "\"" rectTag H.! A.class_ (H.stringValue ("hm" ++ show i))
++ " viewBox=\"0 0 " ++ show legendW ++ " " ++ show cellSz ++ "\"" H.! customAttr "x" (show (i * step))
++ " style=\"display:inline;vertical-align:middle\">" H.! customAttr "y" "0"
++ concatMap (\i -> H.! customAttr "width" (show cellSz)
"<rect class=\"hm" ++ show i ++ "\"" H.! customAttr "height" (show cellSz)
++ " x=\"" ++ show (i * step) ++ "\" y=\"0\"" H.! customAttr "rx" "2"
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\"" $ mempty
++ " rx=\"2\"/>") [0..4]
++ "</svg>"
in "<figure class=\"stats-heatmap\">" legendSvg =
++ "<svg width=\"" ++ show svgW ++ "\" height=\"" ++ show svgH ++ "\"" svgTag H.! customAttr "width" (show legendW)
++ " viewBox=\"0 0 " ++ show svgW ++ " " ++ show svgH ++ "\"" H.! customAttr "height" (show cellSz)
++ " class=\"heatmap-svg\" role=\"img\"" H.! customAttr "viewBox" ("0 0 " ++ show legendW ++ " " ++ show cellSz)
++ " aria-label=\"52-week writing activity heatmap\">" H.! customAttr "style" "display:inline;vertical-align:middle"
++ "<style>" $ mapM_ legendCell [0 .. 4 :: Int]
++ ".hm0{fill:var(--hm-0)}.hm1{fill:var(--hm-1)}.hm2{fill:var(--hm-2)}"
++ ".hm3{fill:var(--hm-3)}.hm4{fill:var(--hm-4)}" in H.figure H.! A.class_ "stats-heatmap" $ do
++ ".hm-lbl{font-size:9px;fill:var(--text-faint);font-family:sans-serif}" svgTag H.! customAttr "width" (show svgW)
++ "</style>" H.! customAttr "height" (show svgH)
++ monthLbls ++ cells H.! customAttr "viewBox" ("0 0 " ++ show svgW ++ " " ++ show svgH)
++ "</svg>" H.! A.class_ "heatmap-svg"
++ "<figcaption class=\"heatmap-legend\">" H.! customAttr "role" "img"
++ "Less\xA0" ++ legendSvg ++ "\xA0More" H.! customAttr "aria-label" "52-week writing activity heatmap"
++ "</figcaption>" $ do
++ "</figure>" mapM_ monthLabel allDays
mapM_ dayCell allDays
H.figcaption H.! A.class_ "heatmap-legend" $ do
"Less\xA0"
legendSvg
"\xA0More"
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Stats page sections -- Stats page sections
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
renderMonthlyVolume :: Map.Map Day Int -> String renderMonthlyVolume :: Map.Map Day Int -> H.Html
renderMonthlyVolume wordsByDay = renderMonthlyVolume wordsByDay =
section "volume" "Monthly volume" $ section "volume" "Monthly volume" $
let byMonth = Map.fromListWith (+) let byMonth = Map.fromListWith (+)
@ -230,71 +369,80 @@ renderMonthlyVolume wordsByDay =
, let (y, m, _) = toGregorian day , let (y, m, _) = toGregorian day
] ]
in if Map.null byMonth in if Map.null byMonth
then "<p><em>No dated content yet.</em></p>" then H.p (H.em "No dated content yet.")
else else
let maxWC = max 1 $ maximum $ Map.elems byMonth let maxWC = max 1 $ maximum $ Map.elems byMonth
bar (y, m) = bar (y, m) =
let wc = fromMaybe 0 (Map.lookup (y, m) byMonth) let wc = fromMaybe 0 (Map.lookup (y, m) byMonth)
pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC) pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC)
lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y) lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y)
in "<div class=\"build-bar-row\">" in H.div H.! A.class_ "build-bar-row" $ do
++ "<span class=\"build-bar-label\">" ++ lbl ++ "</span>" H.span H.! A.class_ "build-bar-label" $ txt lbl
++ "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:" H.span H.! A.class_ "build-bar-wrap" $
++ show pct ++ "%\"></span></span>" H.span H.! A.class_ "build-bar"
++ "<span class=\"build-bar-count\">" H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
++ (if wc > 0 then commaInt wc else "") ++ "</span>" $ mempty
++ "</div>" H.span H.! A.class_ "build-bar-count" $
in "<div class=\"build-bars\">" ++ concatMap bar (Map.keys byMonth) ++ "</div>" if wc > 0 then txt (commaInt wc) else mempty
in H.div H.! A.class_ "build-bars" $
mapM_ bar (Map.keys byMonth)
renderCorpus :: [TypeRow] -> [PageInfo] -> String renderCorpus :: [TypeRow] -> [PageInfo] -> H.Html
renderCorpus typeRows allPIs = renderCorpus typeRows allPIs =
section "corpus" "Corpus" $ concat section "corpus" "Corpus" $ do
[ dl [ ("Total words", commaInt totalWords) dl [ ("Total words", txt (commaInt totalWords))
, ("Total pages", commaInt (length allPIs)) , ("Total pages", txt (commaInt (length allPIs)))
, ("Total reading time", rtStr totalWords) , ("Total reading time", txt (rtStr totalWords))
, ("Average length", commaInt avgWC ++ " words") , ("Average length", txt (commaInt avgWC ++ " words"))
, ("Median length", commaInt medWC ++ " words") , ("Median length", txt (commaInt medWC ++ " words"))
] ]
, table ["Type", "Pages", "Words", "Reading time"] table ["Type", "Pages", "Words", "Reading time"]
(map row typeRows) (map row typeRows)
(Just ["Total", commaInt (sum (map trCount typeRows)) (Just [ "Total"
, commaInt totalWords, rtStr totalWords]) , txt (commaInt (sum (map trCount typeRows)))
] , txt (commaInt totalWords)
, txt (rtStr totalWords)
])
where where
hasSomeWC = filter (\p -> piWC p > 0) allPIs hasSomeWC = filter (\p -> piWC p > 0) allPIs
totalWords = sum (map trWords typeRows) totalWords = sum (map trWords typeRows)
avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC
medWC = median (map piWC hasSomeWC) medWC = median (map piWC hasSomeWC)
row r = [trLabel r, commaInt (trCount r), commaInt (trWords r), rtStr (trWords r)] row r = [ txt (trLabel r)
, txt (commaInt (trCount r))
, txt (commaInt (trWords r))
, txt (rtStr (trWords r))
]
renderNotable :: [PageInfo] -> String renderNotable :: [PageInfo] -> H.Html
renderNotable allPIs = renderNotable allPIs =
section "notable" "Notable" $ concat section "notable" "Notable" $ do
[ "<p><strong>Longest</strong></p>" H.p (H.strong "Longest")
, pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC)) pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>" H.p (H.strong "Shortest")
, pageList (take 5 (sortBy (comparing piWC) hasSomeWC)) pageList (take 5 (sortBy (comparing piWC) hasSomeWC))
]
where where
hasSomeWC = filter (\p -> piWC p > 50) allPIs hasSomeWC = filter (\p -> piWC p > 50) allPIs
pageList ps = "<ol class=\"build-page-list\">" pageList ps = H.ol H.! A.class_ "build-page-list" $
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p) mapM_ (\p -> H.li $ do
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps pageLink (piUrl p) (piTitle p)
++ "</ol>" txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
) ps
renderStatsTags :: [(String, Int)] -> Int -> String -- | Renamed/aliased to 'renderTagsSection' below — kept as a name for
renderStatsTags topTags uniqueCount = -- legacy call sites until they are migrated. Defining it as the same
section "tags" "Tags" $ concat -- function (instead of an independent copy) prevents the two from
[ dl [("Unique tags", commaInt uniqueCount)] -- drifting silently.
, table ["Tag", "Items"] (map row topTags) Nothing renderStatsTags :: [(String, Int)] -> Int -> H.Html
] renderStatsTags = renderTagsSection
where row (t, n) = [link ("/" ++ t ++ "/") t, show n]
statsTOC :: String statsTOC :: H.Html
statsTOC = "<ol>\n" ++ concatMap item entries ++ "</ol>\n" statsTOC = H.ol $ mapM_ item entries
where where
item (i, t) = "<li><a href=\"#" ++ i ++ "\" data-target=\"" ++ i ++ "\">" item (i, t) =
++ t ++ "</a></li>\n" H.li $ H.a H.! A.href (H.stringValue ("#" ++ i))
H.! customAttr "data-target" i
$ txt t
entries = [ ("activity", "Writing activity") entries = [ ("activity", "Writing activity")
, ("volume", "Monthly volume") , ("volume", "Monthly volume")
, ("corpus", "Corpus") , ("corpus", "Corpus")
@ -306,17 +454,28 @@ statsTOC = "<ol>\n" ++ concatMap item entries ++ "</ol>\n"
-- IO: output directory walk -- IO: output directory walk
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- | Recursively walk a directory, returning @(file, size)@ tuples for every
-- regular file beneath it.
--
-- Symlinks (both files and directories) are skipped, so a stray
-- @_site\/a -> _site@ doesn't trigger an infinite loop.
walkDir :: FilePath -> IO [(FilePath, Integer)] walkDir :: FilePath -> IO [(FilePath, Integer)]
walkDir dir = do walkDir dir = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return []) entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
fmap concat $ forM entries $ \e -> do fmap concat $ forM entries $ \e -> do
let path = dir </> e let path = dir </> e
isDir <- doesDirectoryExist path isLink <- pathIsSymbolicLink path
if isDir `catch` (\(_ :: IOException) -> return False)
then walkDir path if isLink
then return []
else do else do
sz <- getFileSize path `catch` (\(_ :: IOException) -> return 0) isDir <- doesDirectoryExist path
return [(path, sz)] if isDir
then walkDir path
else do
sz <- getFileSize path
`catch` (\(_ :: IOException) -> return 0)
return [(path, sz)]
displayExt :: FilePath -> String displayExt :: FilePath -> String
displayExt path = case takeExtension path of displayExt path = case takeExtension path of
@ -354,9 +513,13 @@ countLinesDir :: FilePath -> String -> (FilePath -> Bool) -> IO (Int, Int)
countLinesDir dir ext skipPred = do countLinesDir dir ext skipPred = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return []) entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries
-- Use strict text IO so the file handle is released as soon as the
-- contents are read; the prior 'readFile' chained lazy IO under
-- 'forM', leaving every handle open until the loop forced 'lines'.
ls <- fmap sum $ forM files $ \e -> do ls <- fmap sum $ forM files $ \e -> do
content <- readFile (dir </> e) `catch` (\(_ :: IOException) -> return "") content <- TIO.readFile (dir </> e)
return (length (lines content)) `catch` (\(_ :: IOException) -> return T.empty)
return (length (T.lines content))
return (length files, ls) return (length files, ls)
getLocStats :: IO (Int, Int, Int, Int, Int, Int) getLocStats :: IO (Int, Int, Int, Int, Int, Int)
@ -385,125 +548,116 @@ getGitStats = do
return (commits, firstDate) return (commits, firstDate)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- HTML rendering: section helpers -- HTML rendering: build page sections
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
section :: String -> String -> String -> String renderContent :: [TypeRow] -> H.Html
section id_ title body = concat
[ "<h2 id=\"", id_, "\">", title, "</h2>\n"
, body
]
table :: [String] -> [[String]] -> Maybe [String] -> String
table headers rows mFoot = concat
[ "<table class=\"build-table\">"
, "<thead><tr>", concatMap (\h -> "<th>" ++ h ++ "</th>") headers, "</tr></thead>"
, "<tbody>", concatMap renderRow rows, "</tbody>"
, maybe "" renderFoot mFoot
, "</table>"
]
where
renderRow cells = "<tr>" ++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells ++ "</tr>"
renderFoot cells = "<tfoot><tr class=\"build-total\">"
++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells
++ "</tr></tfoot>"
dl :: [(String, String)] -> String
dl pairs = "<dl class=\"build-meta\">"
++ concatMap (\(k, v) -> "<dt>" ++ k ++ "</dt><dd>" ++ v ++ "</dd>") pairs
++ "</dl>"
link :: String -> String -> String
link url title = "<a href=\"" ++ url ++ "\">" ++ title ++ "</a>"
-- ---------------------------------------------------------------------------
-- HTML rendering: sections
-- ---------------------------------------------------------------------------
renderContent :: [TypeRow] -> String
renderContent rows = renderContent rows =
section "content" "Content" $ section "content" "Content" $
table table ["Type", "Count", "Words", "Reading time"]
["Type", "Count", "Words", "Reading time"] (map row rows)
(map row rows) (Just [ "Total"
(Just ["Total", commaInt totalCount, commaInt totalWords, rtStr totalWords]) , txt (commaInt totalCount)
, txt (commaInt totalWords)
, txt (rtStr totalWords)
])
where where
totalCount = sum (map trCount rows) totalCount = sum (map trCount rows)
totalWords = sum (map trWords rows) totalWords = sum (map trWords rows)
row r = [ trLabel r row r = [ txt (trLabel r)
, commaInt (trCount r) , txt (commaInt (trCount r))
, commaInt (trWords r) , txt (commaInt (trWords r))
, rtStr (trWords r) , txt (rtStr (trWords r))
] ]
renderPages :: [PageInfo] -> Maybe (String,String,String) -> Maybe (String,String,String) -> String renderPages :: [PageInfo]
-> Maybe (String, String, String)
-> Maybe (String, String, String)
-> H.Html
renderPages allPIs mOldest mNewest = renderPages allPIs mOldest mNewest =
section "pages" "Pages" $ concat section "pages" "Pages" $ do
[ dl $ dl $
[ ("Total pages", commaInt (length allPIs)) [ ("Total pages", txt (commaInt (length allPIs)))
, ("Average length", commaInt avgWC ++ " words") , ("Average length", txt (commaInt avgWC ++ " words"))
] ++ ] ++
maybe [] (\(d,t,u) -> [("Oldest content", d ++ " \x2014 " ++ link u t)]) mOldest ++ maybe [] (\(d,t,u) -> [("Oldest content", datedLink d t u)]) mOldest ++
maybe [] (\(d,t,u) -> [("Newest content", d ++ " \x2014 " ++ link u t)]) mNewest maybe [] (\(d,t,u) -> [("Newest content", datedLink d t u)]) mNewest
, "<p><strong>Longest</strong></p>" H.p (H.strong "Longest")
, pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC)) pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>" H.p (H.strong "Shortest")
, pageList (take 3 (sortBy (comparing piWC) hasSomeWC)) pageList (take 3 (sortBy (comparing piWC) hasSomeWC))
]
where where
hasSomeWC = filter (\p -> piWC p > 0) allPIs hasSomeWC = filter (\p -> piWC p > 0) allPIs
avgWC = if null hasSomeWC then 0 avgWC = if null hasSomeWC then 0
else sum (map piWC hasSomeWC) `div` length hasSomeWC else sum (map piWC hasSomeWC) `div` length hasSomeWC
pageList ps = "<ol class=\"build-page-list\">" datedLink d t u = do
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p) txt (d ++ " \x2014 ")
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps pageLink u t
++ "</ol>" pageList ps = H.ol H.! A.class_ "build-page-list" $
mapM_ (\p -> H.li $ do
pageLink (piUrl p) (piTitle p)
txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
) ps
renderDistribution :: [Int] -> String renderDistribution :: [Int] -> H.Html
renderDistribution wcs = renderDistribution wcs =
section "distribution" "Word-length distribution" $ section "distribution" "Word-length distribution" $
"<div class=\"build-bars\">" ++ concatMap bar buckets ++ "</div>" H.div H.! A.class_ "build-bars" $ mapM_ bar buckets
where where
bucketOf w bucketOf w
| w < 500 = 0 | w < 1000 = 1 | w < 2000 = 2 | w < 5000 = 3 | otherwise = 4 | w < 500 = 0
labels = ["&lt; 500", "500 \x2013 1k", "1k \x2013 2k", "2k \x2013 5k", "\x2265 5k"] | w < 1000 = 1
| w < 2000 = 2
| w < 5000 = 3
| otherwise = 4
labels :: [H.Html]
labels = [ "< 500"
, "500 \x2013 1k"
, "1k \x2013 2k"
, "2k \x2013 5k"
, "\x2265 5k"
]
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc) counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc)
(Map.fromList [(i, 0 :: Int) | i <- [0..4]]) wcs (Map.fromList [(i, 0 :: Int) | i <- [0 .. 4]]) wcs
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0..4]] buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0 .. 4]]
maxCount = max 1 (maximum (map snd buckets)) maxCount = max 1 (maximum (map snd buckets))
bar (lbl, n) = bar (lbl, n) =
let pct = n * 100 `div` maxCount let pct = n * 100 `div` maxCount
in concat in H.div H.! A.class_ "build-bar-row" $ do
[ "<div class=\"build-bar-row\">" H.span H.! A.class_ "build-bar-label" $ lbl
, "<span class=\"build-bar-label\">", lbl, "</span>" H.span H.! A.class_ "build-bar-wrap" $
, "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:" H.span H.! A.class_ "build-bar"
, show pct, "%\"></span></span>" H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
, "<span class=\"build-bar-count\">", show n, "</span>" $ mempty
, "</div>" H.span H.! A.class_ "build-bar-count" $ txt (show n)
]
renderTagsSection :: [(String, Int)] -> Int -> String renderTagsSection :: [(String, Int)] -> Int -> H.Html
renderTagsSection topTags uniqueCount = renderTagsSection topTags uniqueCount =
section "tags" "Tags" $ concat section "tags" "Tags" $ do
[ dl [("Unique tags", commaInt uniqueCount)] dl [("Unique tags", txt (commaInt uniqueCount))]
, table ["Tag", "Items"] (map row topTags) Nothing table ["Tag", "Items"] (map row topTags) Nothing
]
where where
row (t, n) = [link ("/" ++ t ++ "/") t, show n] row (t, n) = [link ("/" ++ t ++ "/") t, txt (show n)]
renderLinks :: Maybe (String, Int, String) -> Int -> Int -> String renderLinks :: Maybe (String, Int, String) -> Int -> Int -> H.Html
renderLinks mMostLinked orphanCount total = renderLinks mMostLinked orphanCount total =
section "links" "Links" $ section "links" "Links" $
dl $ dl
(case mMostLinked of [ case mMostLinked of
Nothing -> [("Most-linked page", "\x2014")] Nothing -> ("Most-linked page", "\x2014")
Just (u, n, t) -> [("Most-linked page", Just (u, n, t) ->
link u t ++ " (" ++ show n ++ " inbound links)")]) ++ ( "Most-linked page"
[ ("Orphan pages", commaInt orphanCount , do pageLink u t
++ " of " ++ commaInt total txt (" (" ++ show n ++ " inbound links)")
++ " (" ++ pctStr orphanCount total ++ ")") ] )
, ( "Orphan pages"
, txt (commaInt orphanCount
++ " of " ++ commaInt total
++ " (" ++ pctStr orphanCount total ++ ")")
)
]
renderEpistemic :: Int -> Int -> Int -> Int -> Int -> String renderEpistemic :: Int -> Int -> Int -> Int -> Int -> H.Html
renderEpistemic total ws wc wi we = renderEpistemic total ws wc wi we =
section "epistemic" "Epistemic coverage" $ section "epistemic" "Epistemic coverage" $
table table
@ -515,47 +669,54 @@ renderEpistemic total ws wc wi we =
] ]
Nothing Nothing
where where
row label n = [label, show n ++ " / " ++ show total, pctStr n total] row label n = [ txt label
, txt (show n ++ " / " ++ show total)
, txt (pctStr n total)
]
renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> String renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> H.Html
renderOutput grouped totalFiles totalSize = renderOutput grouped totalFiles totalSize =
section "output" "Output" $ section "output" "Output" $
table table
["Type", "Files", "Size"] ["Type", "Files", "Size"]
(map row (sortBy (comparing (Down . snd . snd)) (Map.toList grouped))) (map row (sortBy (comparing (Down . snd . snd)) (Map.toList grouped)))
(Just ["Total", commaInt totalFiles, formatBytes totalSize]) (Just [ "Total"
, txt (commaInt totalFiles)
, txt (formatBytes totalSize)
])
where where
row (ext, (n, sz)) = [ext, commaInt n, formatBytes sz] row (ext, (n, sz)) = [txt ext, txt (commaInt n), txt (formatBytes sz)]
renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> String renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> H.Html
renderRepository hf hl cf cl jf jl commits firstDate = renderRepository hf hl cf cl jf jl commits firstDate =
section "repository" "Repository" $ section "repository" "Repository" $
dl dl
[ ("Haskell", commaInt hl ++ " lines across " ++ show hf ++ " files") [ ("Haskell", txt (commaInt hl ++ " lines across " ++ show hf ++ " files"))
, ("CSS", commaInt cl ++ " lines across " ++ show cf ++ " files") , ("CSS", txt (commaInt cl ++ " lines across " ++ show cf ++ " files"))
, ("JavaScript", commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)") , ("JavaScript", txt (commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)"))
, ("Total git commits", commaInt commits) , ("Total git commits", txt (commaInt commits))
, ("Repository started", firstDate) , ("Repository started", txt firstDate)
] ]
renderBuild :: String -> String -> String renderBuild :: String -> String -> H.Html
renderBuild ts dur = renderBuild ts dur =
section "build" "Build" $ section "build" "Build" $
dl dl
[ ("Generated", ts) [ ("Generated", txt ts)
, ("Last build duration", dur) , ("Last build duration", txt dur)
] ]
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Static TOC (matches the nine h2 sections above) -- Static TOC (matches the nine h2 sections above)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
pageTOC :: String pageTOC :: H.Html
pageTOC = "<ol>\n" ++ concatMap item sections ++ "</ol>\n" pageTOC = H.ol $ mapM_ item sections
where where
item (id_, title) = item (id_, title) =
"<li><a href=\"#" ++ id_ ++ "\" data-target=\"" ++ id_ ++ "\">" H.li $ H.a H.! A.href (H.stringValue ("#" ++ id_))
++ title ++ "</a></li>\n" H.! customAttr "data-target" id_
$ txt title
sections = sections =
[ ("content", "Content") [ ("content", "Content")
, ("pages", "Pages") , ("pages", "Pages")
@ -584,7 +745,7 @@ statsRules tags = do
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
-- Load all content items -- Load all content items
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion) essays <- loadAll (P.essayPattern .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion) posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion) poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion) fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
@ -664,9 +825,9 @@ statsRules tags = do
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
essayMetas <- mapM (getMetadata . itemIdentifier) essays essayMetas <- mapM (getMetadata . itemIdentifier) essays
postMetas <- mapM (getMetadata . itemIdentifier) posts postMetas <- mapM (getMetadata . itemIdentifier) posts
let epMetas = essayMetas ++ postMetas let epMetas = essayMetas ++ postMetas
epTotal = length epMetas epTotal = length epMetas
ep f = length (filter (isJust . f) epMetas) ep f = length (filter (isJust . f) epMetas)
withStatus = ep (lookupString "status") withStatus = ep (lookupString "status")
withConf = ep (lookupString "confidence") withConf = ep (lookupString "confidence")
withImp = ep (lookupString "importance") withImp = ep (lookupString "importance")
@ -698,32 +859,33 @@ statsRules tags = do
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
-- Assemble page -- Assemble page
-- ---------------------------------------------------------------- -- ----------------------------------------------------------------
let content = concat let htmlContent :: H.Html
[ renderContent rows htmlContent = do
, renderPages allPIs oldestDate newestDate renderContent rows
, renderDistribution allWCs renderPages allPIs oldestDate newestDate
, renderTagsSection topTags uniqueTags renderDistribution allWCs
, renderLinks mostLinkedInfo orphanCount (length allPIs) renderTagsSection topTags uniqueTags
, renderEpistemic epTotal withStatus withConf withImp withEv renderLinks mostLinkedInfo orphanCount (length allPIs)
, renderOutput outputGrouped totalFiles totalSize renderEpistemic epTotal withStatus withConf withImp withEv
, renderRepository hf hl cf cl jf jl commits firstDate renderOutput outputGrouped totalFiles totalSize
, renderBuild buildTimestamp lastBuildDur renderRepository hf hl cf cl jf jl commits firstDate
] renderBuild buildTimestamp lastBuildDur
plainText = stripHtmlTags content contentString = renderHtml htmlContent
wc = length (words plainText) plainText = stripHtmlTags contentString
rt = readingTime plainText wc = length (words plainText)
ctx = constField "toc" pageTOC rt = readingTime plainText
<> constField "word-count" (show wc) ctx = constField "toc" (renderHtml pageTOC)
<> constField "reading-time" (show rt) <> constField "word-count" (show wc)
<> constField "title" "Build Telemetry" <> constField "reading-time" (show rt)
<> constField "abstract" "Per-build corpus statistics, tag distribution, \ <> constField "title" "Build Telemetry"
\link analysis, epistemic coverage, output metrics, \ <> constField "abstract" "Per-build corpus statistics, tag distribution, \
\repository overview, and build timing." \link analysis, epistemic coverage, output metrics, \
<> constField "build" "true" \repository overview, and build timing."
<> authorLinksField <> constField "build" "true"
<> siteCtx <> authorLinksField
<> siteCtx
makeItem content makeItem contentString
>>= loadAndApplyTemplate "templates/essay.html" ctx >>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx >>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls >>= relativizeUrls
@ -734,7 +896,7 @@ statsRules tags = do
create ["stats/index.html"] $ do create ["stats/index.html"] $ do
route idRoute route idRoute
compile $ do compile $ do
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion) essays <- loadAll (P.essayPattern .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion) posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion) poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion) fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
@ -773,27 +935,28 @@ statsRules tags = do
today <- unsafeCompiler (utctDay <$> getCurrentTime) today <- unsafeCompiler (utctDay <$> getCurrentTime)
let content = concat let htmlContent :: H.Html
[ section "activity" "Writing activity" (renderHeatmap wordsByDay today) htmlContent = do
, renderMonthlyVolume wordsByDay section "activity" "Writing activity" (renderHeatmap wordsByDay today)
, renderCorpus typeRows allPIs renderMonthlyVolume wordsByDay
, renderNotable allPIs renderCorpus typeRows allPIs
, renderStatsTags topTags uniqueTags renderNotable allPIs
] renderStatsTags topTags uniqueTags
plainText = stripHtmlTags content contentString = renderHtml htmlContent
wc = length (words plainText) plainText = stripHtmlTags contentString
rt = readingTime plainText wc = length (words plainText)
ctx = constField "toc" statsTOC rt = readingTime plainText
<> constField "word-count" (show wc) ctx = constField "toc" (renderHtml statsTOC)
<> constField "reading-time" (show rt) <> constField "word-count" (show wc)
<> constField "title" "Writing Statistics" <> constField "reading-time" (show rt)
<> constField "abstract" "Writing activity, corpus breakdown, \ <> constField "title" "Writing Statistics"
\and tag distribution computed at build time." <> constField "abstract" "Writing activity, corpus breakdown, \
<> constField "build" "true" \and tag distribution computed at build time."
<> authorLinksField <> constField "build" "true"
<> siteCtx <> authorLinksField
<> siteCtx
makeItem content makeItem contentString
>>= loadAndApplyTemplate "templates/essay.html" ctx >>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx >>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls >>= relativizeUrls

View File

@ -15,12 +15,13 @@
module Tags module Tags
( buildAllTags ( buildAllTags
, applyTagRules , applyTagRules
, tagLinksField
) where ) where
import Data.List (intercalate, nub) import Data.List (intercalate, nub)
import Hakyll import Hakyll
import Pagination (pageSize, sortAndGroup) import Pagination (sortAndGroup)
import Patterns (tagIndexable)
import Contexts (abstractField, tagLinksField)
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -64,9 +65,7 @@ tagIdentifier = fromFilePath . tagFilePath
-- | Scan all essays and blog posts and build the Tags index. -- | Scan all essays and blog posts and build the Tags index.
buildAllTags :: Rules Tags buildAllTags :: Rules Tags
buildAllTags = buildAllTags =
buildTagsWith getExpandedTags allContent tagIdentifier buildTagsWith getExpandedTags tagIndexable tagIdentifier
where
allContent = ("content/essays/*.md" .||. "content/essays/*/index.md" .||. "content/blog/*.md") .&&. hasNoVersion
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
@ -77,6 +76,7 @@ tagItemCtx :: Context String
tagItemCtx = tagItemCtx =
dateField "date" "%-d %B %Y" dateField "date" "%-d %B %Y"
<> tagLinksField "item-tags" <> tagLinksField "item-tags"
<> abstractField
<> defaultContext <> defaultContext
-- | Page identifier for a tag index page. -- | Page identifier for a tag index page.
@ -106,18 +106,3 @@ applyTagRules tags baseCtx = tagsRules tags $ \tag pat -> do
>>= relativizeUrls >>= relativizeUrls
-- ---------------------------------------------------------------------------
-- Tag links context field
-- ---------------------------------------------------------------------------
-- | List context field exposing an item's own (non-expanded) tags as
-- @tag-name@ / @tag-url@ objects.
--
-- $for(essay-tags)$<a href="$tag-url$">$tag-name$</a>$endfor$
tagLinksField :: String -> Context a
tagLinksField fieldName = listFieldWith fieldName itemCtx $ \item ->
map toItem <$> getTags (itemIdentifier item)
where
toItem t = Item (tagIdentifier t) t
itemCtx = field "tag-name" (return . itemBody)
<> field "tag-url" (\i -> return $ "/" ++ itemBody i ++ "/")

View File

@ -1,10 +1,26 @@
{-# LANGUAGE GHC2021 #-} {-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Shared utilities used across the build system.
--
-- The HTML escapers (one for 'String', one for 'Text') live here so that
-- every filter, context, and renderer goes through the same definition.
-- The expansion order matters: @&@ MUST be replaced first, otherwise the
-- @&amp;@ injected by other rules gets re-escaped to @&amp;amp;@. The
-- pure-character-by-character implementation used here avoids that hazard
-- entirely (each character is mapped exactly once).
module Utils module Utils
( wordCount ( wordCount
, readingTime , readingTime
, escapeHtml , escapeHtml
, escapeHtmlText
, trim
, authorSlugify
, authorNameOf
) where ) where
import Data.Char (isAlphaNum, isSpace, toLower)
import qualified Data.Text as T
-- | Count the number of words in a string (split on whitespace). -- | Count the number of words in a string (split on whitespace).
wordCount :: String -> Int wordCount :: String -> Int
wordCount = length . words wordCount = length . words
@ -14,13 +30,49 @@ wordCount = length . words
readingTime :: String -> Int readingTime :: String -> Int
readingTime s = max 1 (wordCount s `div` 200) readingTime s = max 1 (wordCount s `div` 200)
-- | Escape HTML special characters: <, >, &, ", '. -- | Escape HTML special characters: @&@, @<@, @>@, @\"@, @\'@.
--
-- Safe for use in attribute values and text content. The order of the
-- @case@ branches is irrelevant — each input character maps to exactly
-- one output sequence.
escapeHtml :: String -> String escapeHtml :: String -> String
escapeHtml = concatMap escChar escapeHtml = concatMap escChar
where where
escChar '&' = "&amp;"
escChar '<' = "&lt;" escChar '<' = "&lt;"
escChar '>' = "&gt;" escChar '>' = "&gt;"
escChar '&' = "&amp;"
escChar '"' = "&quot;" escChar '"' = "&quot;"
escChar '\'' = "&#39;" escChar '\'' = "&#39;"
escChar c = [c] escChar c = [c]
-- | 'Text' counterpart of 'escapeHtml'.
escapeHtmlText :: T.Text -> T.Text
escapeHtmlText = T.concatMap escChar
where
escChar '&' = "&amp;"
escChar '<' = "&lt;"
escChar '>' = "&gt;"
escChar '"' = "&quot;"
escChar '\'' = "&#39;"
escChar c = T.singleton c
-- | Strip leading and trailing whitespace.
trim :: String -> String
trim = dropWhile isSpace . reverse . dropWhile isSpace . reverse
-- | Lowercase a string, drop everything that isn't alphanumeric or
-- space, then replace runs of spaces with single hyphens.
--
-- Used for author URL slugs (e.g. @"Levi Neuwirth" → "levi-neuwirth"@).
-- Centralised here so 'Authors' and 'Contexts' cannot drift on Unicode
-- edge cases.
authorSlugify :: String -> String
authorSlugify = map (\c -> if c == ' ' then '-' else c)
. filter (\c -> isAlphaNum c || c == ' ')
. map toLower
-- | Extract the author name from a "Name | url" frontmatter entry.
-- The URL portion is dropped (it's no longer used by the author system,
-- which routes everything through @/authors/{slug}/@).
authorNameOf :: String -> String
authorNameOf s = trim (takeWhile (/= '|') s)

View File

@ -2,7 +2,7 @@ active-repositories: hackage.haskell.org:merge
constraints: any.Glob ==0.10.2, constraints: any.Glob ==0.10.2,
any.HUnit ==1.6.2.0, any.HUnit ==1.6.2.0,
any.JuicyPixels ==3.3.9, any.JuicyPixels ==3.3.9,
any.OneTuple ==0.4.2, any.OneTuple ==0.4.2.1,
any.Only ==0.1, any.Only ==0.1,
any.QuickCheck ==2.15.0.1, any.QuickCheck ==2.15.0.1,
any.StateVar ==1.2.2, any.StateVar ==1.2.2,
@ -182,7 +182,7 @@ constraints: any.Glob ==0.10.2,
any.text-conversions ==0.3.1.1, any.text-conversions ==0.3.1.1,
any.text-icu ==0.8.0.5, any.text-icu ==0.8.0.5,
any.text-iso8601 ==0.1.1, any.text-iso8601 ==0.1.1,
any.text-short ==0.1.6, any.text-short ==0.1.6.1,
any.th-abstraction ==0.6.0.0, any.th-abstraction ==0.6.0.0,
any.th-compat ==0.1.7, any.th-compat ==0.1.7,
any.th-expand-syns ==0.4.12.0, any.th-expand-syns ==0.4.12.0,

View File

@ -20,9 +20,9 @@ executable site
SimilarLinks SimilarLinks
Compilers Compilers
Contexts Contexts
Patterns
Stats Stats
Stability Stability
Metadata
Tags Tags
Pagination Pagination
Citations Citations
@ -33,6 +33,7 @@ executable site
Filters.Smallcaps Filters.Smallcaps
Filters.Wikilinks Filters.Wikilinks
Filters.Transclusion Filters.Transclusion
Filters.EmbedPdf
Filters.Links Filters.Links
Filters.Math Filters.Math
Filters.Code Filters.Code
@ -56,9 +57,10 @@ executable site
bytestring >= 0.11 && < 0.13, bytestring >= 0.11 && < 0.13,
process >= 1.6 && < 1.7, process >= 1.6 && < 1.7,
data-default >= 0.7 && < 0.8, data-default >= 0.7 && < 0.8,
mtl >= 2.3 && < 2.4 mtl >= 2.3 && < 2.4,
blaze-html >= 0.9 && < 0.10,
blaze-markup >= 0.8 && < 0.9
default-language: GHC2021 default-language: GHC2021
ghc-options: ghc-options:
-threaded -threaded
-Wall -Wall
-Wno-unused-imports