ozymandias/build/Compilers.hs

211 lines
8.8 KiB
Haskell

{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
module Compilers
( essayCompiler
, postCompiler
, pageCompiler
, poetryCompiler
, fictionCompiler
, compositionCompiler
, photographyCompiler
, readerOpts
, writerOpts
) where
import Hakyll
import Text.Pandoc.Definition (Pandoc (..), Block (..))
import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..),
HTMLMathMethod (..))
import Text.Pandoc.Extensions (enableExtension, Extension (..))
import qualified Data.Text as T
import Data.Maybe (fromMaybe)
import System.FilePath (takeDirectory)
import Inlines (stringify)
import Utils (wordCount, readingTime, escapeHtml)
import Filters (applyAll, preprocessSource)
import qualified Citations
import qualified Filters.Score as Score
import qualified Filters.Viz as Viz
-- ---------------------------------------------------------------------------
-- Reader / writer options
-- ---------------------------------------------------------------------------
readerOpts :: ReaderOptions
readerOpts = defaultHakyllReaderOptions
-- | Reader options with hard_line_breaks enabled — every source newline within
-- a paragraph becomes a <br>. Used for poetry so stanza lines render as-is.
poetryReaderOpts :: ReaderOptions
poetryReaderOpts = readerOpts
{ readerExtensions = enableExtension Ext_hard_line_breaks
(readerExtensions readerOpts) }
writerOpts :: WriterOptions
writerOpts = defaultHakyllWriterOptions
{ writerHTMLMathMethod = KaTeX ""
, writerHighlightStyle = Nothing
, writerNumberSections = False
, writerTableOfContents = False
}
-- ---------------------------------------------------------------------------
-- TOC extraction
-- ---------------------------------------------------------------------------
-- | Collect (level, identifier, title-text) for h2/h3 headings.
collectHeadings :: Pandoc -> [(Int, T.Text, String)]
collectHeadings (Pandoc _ blocks) = concatMap go blocks
where
go (Header lvl (ident, _, _) inlines)
| lvl == 2 || lvl == 3
= [(lvl, ident, T.unpack (stringify inlines))]
go _ = []
-- ---------------------------------------------------------------------------
-- TOC tree
-- ---------------------------------------------------------------------------
data TOCNode = TOCNode T.Text String [TOCNode]
buildTree :: [(Int, T.Text, String)] -> [TOCNode]
buildTree = go 2
where
go _ [] = []
go lvl ((l, i, t) : rest)
| l == lvl =
let (childItems, remaining) = span (\(l', _, _) -> l' > lvl) rest
children = go (lvl + 1) childItems
in TOCNode i t children : go lvl remaining
| l < lvl = []
| otherwise = go lvl rest -- skip unexpected deeper items at this level
renderTOC :: [TOCNode] -> String
renderTOC [] = ""
renderTOC nodes = "<ol>\n" ++ concatMap renderNode nodes ++ "</ol>\n"
where
renderNode (TOCNode i t children) =
"<li><a href=\"#" ++ T.unpack i ++ "\" data-target=\"" ++ T.unpack i ++ "\">"
++ Utils.escapeHtml t ++ "</a>" ++ renderTOC children ++ "</li>\n"
-- | Build a TOC HTML string from a Pandoc document.
buildTOC :: Pandoc -> String
buildTOC doc = renderTOC (buildTree (collectHeadings doc))
-- ---------------------------------------------------------------------------
-- Compilers
-- ---------------------------------------------------------------------------
-- | Shared compiler pipeline parameterised on reader options.
-- Saves toc/word-count/reading-time/bibliography snapshots.
essayCompilerWith :: ReaderOptions -> Compiler (Item String)
essayCompilerWith rOpts = do
-- Raw Markdown source (used for word count / reading time).
body <- getResourceBody
let src = itemBody body
-- Apply source-level preprocessors (wikilinks, etc.) before parsing.
let body' = itemSetBody (preprocessSource src) body
-- Parse to Pandoc AST.
pandocItem <- readPandocWith rOpts body'
-- Get further-reading keys from Hakyll metadata (YAML frontmatter is stripped
-- before being passed to readPandocWith, so we read it from Hakyll instead).
ident <- getUnderlying
meta <- getMetadata ident
let frKeys = map T.pack $ fromMaybe [] (lookupStringList "further-reading" meta)
let bibPath = T.pack $ fromMaybe "data/bibliography.bib" (lookupString "bibliography" meta)
-- Run citeproc, transform citation spans → superscripts, extract bibliography.
(pandocWithCites, bibHtml, furtherHtml) <- unsafeCompiler $
Citations.applyCitations frKeys bibPath (itemBody pandocItem)
-- Inline SVG score fragments and data visualizations (both read files
-- relative to the source file's directory).
filePath <- getResourceFilePath
let srcDir = takeDirectory filePath
pandocWithScores <- unsafeCompiler $
Score.inlineScores srcDir pandocWithCites
pandocWithViz <- unsafeCompiler $
Viz.inlineViz srcDir pandocWithScores
-- Apply remaining AST-level filters (sidenotes, smallcaps, links, etc.).
-- applyAll touches the filesystem via Images.apply (webp existence
-- check), so it runs through unsafeCompiler.
pandocFiltered <- unsafeCompiler $ applyAll srcDir pandocWithViz
let pandocItem' = itemSetBody pandocFiltered pandocItem
-- Build TOC from the filtered AST.
let toc = buildTOC pandocFiltered
-- Write HTML.
let htmlItem = writePandocWith writerOpts pandocItem'
-- Save snapshots keyed to this item's identifier.
_ <- saveSnapshot "toc" (itemSetBody toc htmlItem)
_ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem)
_ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem)
_ <- saveSnapshot "bibliography" (itemSetBody (T.unpack bibHtml) htmlItem)
_ <- saveSnapshot "further-reading-refs" (itemSetBody (T.unpack furtherHtml) htmlItem)
return htmlItem
-- | Compiler for essays.
essayCompiler :: Compiler (Item String)
essayCompiler = essayCompilerWith readerOpts
-- | Compiler for blog posts: same pipeline as essays.
postCompiler :: Compiler (Item String)
postCompiler = essayCompiler
-- | Compiler for poetry: enables hard_line_breaks so each source line becomes
-- a <br>, preserving verse line endings without manual trailing-space markup.
poetryCompiler :: Compiler (Item String)
poetryCompiler = essayCompilerWith poetryReaderOpts
-- | Compiler for fiction: same pipeline as essays; visual differences are
-- handled entirely by the reading template and reading.css.
fictionCompiler :: Compiler (Item String)
fictionCompiler = essayCompiler
-- | Compiler for music composition landing pages: full essay pipeline
-- (TOC, sidenotes, score fragments, citations, smallcaps, etc.).
compositionCompiler :: Compiler (Item String)
compositionCompiler = essayCompiler
-- | Compiler for photography pages: body prose runs through the same
-- source preprocessors and AST filters as other content (so wikilinks,
-- smallcaps, sidenotes, image @<picture>@ wrapping, etc. all work in
-- caption / process-note prose), but skips TOC, word-count,
-- reading-time, citations, and further-reading. Visual content has no
-- meaningful word count, and the epistemic / bibliography surfaces in
-- 'essayCtx' don't apply here.
photographyCompiler :: Compiler (Item String)
photographyCompiler = do
body <- getResourceBody
let src = itemBody body
body' = itemSetBody (preprocessSource src) body
filePath <- getResourceFilePath
let srcDir = takeDirectory filePath
pandocItem <- readPandocWith readerOpts body'
pandocFiltered <- unsafeCompiler $ applyAll srcDir (itemBody pandocItem)
let pandocItem' = itemSetBody pandocFiltered pandocItem
return (writePandocWith writerOpts pandocItem')
-- | Compiler for simple pages: filters applied, no TOC snapshot.
pageCompiler :: Compiler (Item String)
pageCompiler = do
body <- getResourceBody
let src = itemBody body
body' = itemSetBody (preprocessSource src) body
filePath <- getResourceFilePath
let srcDir = takeDirectory filePath
pandocItem <- readPandocWith readerOpts body'
pandocFiltered <- unsafeCompiler $ applyAll srcDir (itemBody pandocItem)
let pandocItem' = itemSetBody pandocFiltered pandocItem
let htmlItem = writePandocWith writerOpts pandocItem'
_ <- saveSnapshot "word-count" (itemSetBody (show (wordCount src)) htmlItem)
_ <- saveSnapshot "reading-time" (itemSetBody (show (readingTime src)) htmlItem)
return htmlItem