{-# LANGUAGE GHC2021 #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} -- | Citation processing pipeline. -- -- Steps: -- 1. Skip if the document contains no Cite nodes and frKeys is empty. -- 2. Inject default bibliography / CSL metadata if absent. -- 3. Inject nocite entries for further-reading keys. -- 4. Run Pandoc's citeproc to resolve references and generate bibliography. -- 5. Walk the AST and replace Cite nodes with numbered superscripts. -- 6. Extract the citeproc bibliography div from the body, reorder by -- first-appearance, split into cited / further-reading sections, -- and render to an HTML string for the template's $bibliography$ field. -- -- Returns (Pandoc without refs div, bibliography HTML). -- The bibliography HTML is empty when there are no citations. -- -- NOTE: processCitations with in-text CSL leaves Cite nodes as Cite nodes -- in the AST — it only populates their inline content and creates the refs -- div. The HTML writer later wraps them in . We must -- therefore match Cite nodes (not Span nodes) in our transform pass. -- -- NOTE: Hakyll strips YAML frontmatter before passing to readPandocWith, so -- the Pandoc Meta is empty. further-reading keys are passed explicitly by the -- caller (read from Hakyll's own metadata via lookupStringList). -- -- NOTE: Does not import Contexts to avoid cycles. module Citations ( applyCitations -- * For synthetic bibliography pages (Phase 6b) , renderBibliographyHtml ) where import Data.List (intercalate, intersperse, nub, partition, sortBy) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe, mapMaybe) import Data.Ord (comparing) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc import Text.Pandoc.Citeproc (processCitations) import Text.Pandoc.Walk import BibExtras (BibExtra (..), emptyBibExtra, parseBibExtras) -- --------------------------------------------------------------------------- -- Public API -- --------------------------------------------------------------------------- -- | Process citations in a Pandoc document. -- @frKeys@: further-reading citation keys (read from Hakyll metadata by -- the caller, since Hakyll strips YAML frontmatter before parsing). -- Returns @(body, citedHtml, furtherHtml)@ where @body@ has Cite nodes -- replaced with numbered superscripts and no bibliography div, -- @citedHtml@ is the inline-cited references HTML, and @furtherHtml@ is -- the further-reading-only references HTML (each empty when absent). applyCitations :: [Text] -> Text -> Pandoc -> IO (Pandoc, Text, Text) applyCitations frKeys bibPath doc | not (hasCitations frKeys doc) = return (doc, "", "") | otherwise = do -- Read custom fields (@file:@, @keywords:@) from the .bib file -- in parallel with citeproc. These don't affect citation -- resolution — they enhance the rendered bibliography entries. extras <- parseBibExtras (T.unpack bibPath) let doc1 = injectMeta frKeys bibPath doc processed <- runIOorExplode $ processCitations doc1 let (body, citedHtml, furtherHtml) = transformAndExtract extras frKeys processed return (body, citedHtml, furtherHtml) -- | Render a standalone bibliography section from a list of citekeys and -- a set of @.bib@ file paths. Used by the synthetic @\/bibliography\/@ -- pages (Phase 6b) to produce CSL-formatted entries outside of any -- essay's citation context. -- -- Given citekeys are passed to citeproc via a synthesized @nocite@ -- metadata entry on an otherwise empty document; citeproc emits a -- @refs@ Div whose children are the rendered entries. We then reorder -- the children to match the caller-supplied @keys@ list (citeproc's -- own ordering is overridden so callers control sort), enhance each -- entry with the Phase 6a PDF-link and keyword-strip hooks, and -- render to HTML wrapped in @\
@. -- -- @extras@ is the combined 'BibExtra' map for the same @.bib@ files; -- passed in so that 'enhanceEntry' can consult @file:@ and -- @keywords:@ without each entry re-parsing the files. renderBibliographyHtml :: [FilePath] -- ^ .bib paths -> Map String BibExtra -- ^ enhancement map -> [String] -- ^ citekeys, in desired order -> IO Text renderBibliographyHtml _ _ [] = return "" renderBibliographyHtml bibPaths extras keys = do let doc = synthesizeNociteDoc bibPaths keys processed <- runIOorExplode $ processCitations doc let refsDivs = concatMap unwrapRefs (pandocBlocks processed) ordered = reorderByKeys keys refsDivs enhanced = map (enhanceEntry extras) ordered return (renderEntries "csl-bib-body" enhanced) where pandocBlocks (Pandoc _ bs) = bs unwrapRefs (Div ("refs", _, _) children) = children unwrapRefs _ = [] -- | Build a Pandoc doc whose only citation-relevant content is a -- @nocite@ metadata entry listing every supplied citekey. Runs -- through 'processCitations' to emit a fully-formatted @refs@ Div -- containing every entry. synthesizeNociteDoc :: [FilePath] -> [String] -> Pandoc synthesizeNociteDoc bibPaths keys = let meta = Meta $ Map.fromList [ ("bibliography", bibPathMeta bibPaths) , ("csl", MetaString "data/chicago-notes.csl") , ("nocite", nociteVal (map T.pack keys)) ] in Pandoc meta [] where bibPathMeta [p] = MetaString (T.pack p) bibPathMeta ps = MetaList (map (MetaString . T.pack) ps) nociteVal ks = MetaInlines (intercalate [Space] (map mkCite ks)) mkCite k = [Cite [Citation k [] [] AuthorInText 1 0] [Str ("@" <> k)]] -- | Reorder a list of @csl-entry@ Divs to match a requested key order. -- Divs not in the key list (shouldn't happen in practice, but safe -- by construction) drop to the end in their original order. reorderByKeys :: [String] -> [Block] -> [Block] reorderByKeys keys divs = let divMap = Map.fromList [ (T.unpack (stripRefPrefix d), blk) | blk@(Div (d, _, _) _) <- divs ] found = mapMaybe (`Map.lookup` divMap) keys leftovers = filter (\blk -> case blk of Div (d, _, _) _ -> T.unpack (stripRefPrefix d) `notElem` keys _ -> True) divs in found ++ leftovers -- --------------------------------------------------------------------------- -- Detection -- --------------------------------------------------------------------------- -- | True if the document has inline [@key] cites or a further-reading list. hasCitations :: [Text] -> Pandoc -> Bool hasCitations frKeys doc = not (null (query collectCites doc)) || not (null frKeys) where collectCites (Cite {}) = [()] collectCites _ = [] -- --------------------------------------------------------------------------- -- Metadata injection -- --------------------------------------------------------------------------- -- | Inject default bibliography / CSL paths and nocite for further-reading. injectMeta :: [Text] -> Text -> Pandoc -> Pandoc injectMeta frKeys bibPath (Pandoc meta blocks) = let meta1 = if null frKeys then meta else insertMeta "nocite" (nociteVal frKeys) meta meta2 = case lookupMeta "bibliography" meta1 of Nothing -> insertMeta "bibliography" (MetaString bibPath) meta1 Just _ -> meta1 meta3 = case lookupMeta "csl" meta2 of Nothing -> insertMeta "csl" (MetaString "data/chicago-notes.csl") meta2 Just _ -> meta2 in Pandoc meta3 blocks where -- Each key becomes its own Cite node (matching what pandoc parses from -- nocite: "@key1 @key2" in YAML frontmatter). nociteVal keys = MetaInlines (intercalate [Space] (map mkCiteNode keys)) mkCiteNode k = [Cite [Citation k [] [] AuthorInText 1 0] [Str ("@" <> k)]] -- | Insert a key/value pair into Pandoc Meta. insertMeta :: Text -> MetaValue -> Meta -> Meta insertMeta k v (Meta m) = Meta (Map.insert k v m) -- --------------------------------------------------------------------------- -- Transform pass -- --------------------------------------------------------------------------- -- | Number citation Cite nodes and extract the bibliography div. transformAndExtract :: Map String BibExtra -> [Text] -> Pandoc -> (Pandoc, Text, Text) transformAndExtract extras frKeys doc@(Pandoc meta _) = let citeOrder = collectCiteOrder doc -- keys, first-appearance order keyNums = Map.fromList (zip citeOrder [1 :: Int ..]) -- Replace Cite nodes with numbered superscript markers doc' = walk (transformInline keyNums) doc -- Pull bibliography div out of body and render to HTML (bodyBlocks, citedHtml, furtherHtml) = extractBibliography extras citeOrder frKeys (pandocBlocks doc') in (Pandoc meta bodyBlocks, citedHtml, furtherHtml) where pandocBlocks (Pandoc _ bs) = bs -- | Collect citation keys in order of first appearance (body only). -- NOTE: after processCitations, Cite nodes remain as Cite in the AST; -- they are not converted to Span nodes with in-text CSL. -- We query only blocks (not metadata) so that nocite Cite nodes injected -- into the 'nocite' meta field are not mistakenly treated as inline citations. collectCiteOrder :: Pandoc -> [Text] collectCiteOrder (Pandoc _ blocks) = nub (query extractKeys blocks) where extractKeys (Cite citations _) = map citationId citations extractKeys _ = [] -- | Replace a Cite node with a numbered superscript marker. transformInline :: Map Text Int -> Inline -> Inline transformInline keyNums (Cite citations _) = let keys = map citationId citations nums = mapMaybe (`Map.lookup` keyNums) keys in case (keys, nums) of -- Both lists are guaranteed non-empty by the @null nums@ check -- below, but pattern-match to keep this total instead of -- relying on @head@. (firstKey : _, firstNum : _) -> RawInline "html" (markerHtml keys firstKey firstNum nums) _ -> Str "" transformInline _ x = x markerHtml :: [Text] -> Text -> Int -> [Int] -> Text markerHtml keys firstKey firstNum nums = let label = "[" <> T.intercalate "," (map tshow nums) <> "]" allIds = T.intercalate " " (map ("ref-" <>) keys) in " tshow firstNum <> "\">" <> " firstKey <> "\" class=\"cite-link\"" <> " data-cite-keys=\"" <> allIds <> "\">" <> label <> "" where tshow = T.pack . show -- --------------------------------------------------------------------------- -- Bibliography extraction + rendering -- --------------------------------------------------------------------------- -- | Separate the @refs@ div from body blocks and render it to HTML. -- Returns @(bodyBlocks, citedHtml, furtherHtml)@. extractBibliography :: Map String BibExtra -> [Text] -> [Text] -> [Block] -> ([Block], Text, Text) extractBibliography extras citeOrder frKeys blocks = let (bodyBlocks, refDivs) = partition (not . isRefsDiv) blocks (citedHtml, furtherHtml) = case refDivs of [] -> ("", "") (d:_) -> renderBibDiv extras citeOrder frKeys d in (bodyBlocks, citedHtml, furtherHtml) where isRefsDiv (Div ("refs", _, _) _) = True isRefsDiv _ = False -- | Render the citeproc @refs@ Div into two HTML strings: -- @(citedHtml, furtherHtml)@ — each is empty when there are no entries -- in that section. Headings are rendered in the template, not here. -- -- Entry bodies are enhanced before numbering: title-wrapped as a -- @.pdf-link[data-pdf-src]@ when the .bib @file:@ field is set (so -- popups.js's PDF hover preview fires), and a trailing -- @\
@ appended when @keywords:@ is set. renderBibDiv :: Map String BibExtra -> [Text] -> [Text] -> Block -> (Text, Text) renderBibDiv extras citeOrder _frKeys (Div _ children) = let enhanced = map (enhanceEntry extras) children keyIndex = Map.fromList (zip citeOrder [0 :: Int ..]) (citedEntries, furtherEntries) = partition (isCited keyIndex) enhanced sorted = sortBy (comparing (entryOrder keyIndex)) citedEntries numbered = zipWith addNumber [1..] sorted citedHtml = renderEntries "csl-bib-body cite-refs" numbered furtherHtml | null furtherEntries = "" | otherwise = renderEntries "csl-bib-body further-reading-refs" furtherEntries in (citedHtml, furtherHtml) renderBibDiv _ _ _ _ = ("", "") -- --------------------------------------------------------------------------- -- Bib entry enhancement (Phase 6a) -- --------------------------------------------------------------------------- -- | Augment a single @csl-entry@ Div with the custom fields we parsed -- from the .bib file. Other Blocks pass through unchanged. enhanceEntry :: Map String BibExtra -> Block -> Block enhanceEntry extras b@(Div attrs@(divId, _, _) blocks) = let key = T.unpack (stripRefPrefix divId) extra = fromMaybe emptyBibExtra (Map.lookup key extras) withLink = case bibFile extra of Nothing -> blocks Just fp -> map (wrapFirstTitleBlock (T.pack fp)) blocks withKw = withLink ++ keywordsBlocks (bibKeywords extra) in case (bibFile extra, bibKeywords extra) of (Nothing, []) -> b _ -> Div attrs withKw enhanceEntry _ b = b -- | In one block of an entry, wrap the first title-bearing inline -- with a @.pdf-link@ anchor. Pandoc's CSL-formatted references -- render the title as either a @Quoted@ (article titles in -- Chicago-notes: "Paper Title") or an @Emph@ (book titles: -- /Book Title/), and those are the first such inline in each -- entry. We wrap at the block level and fall back to passing the -- block through if no matching inline appears. wrapFirstTitleBlock :: Text -> Block -> Block wrapFirstTitleBlock href = \case Para ils -> Para (wrapFirstTitle href ils) Plain ils -> Plain (wrapFirstTitle href ils) other -> other -- | Left-to-right scan: wrap the first title-bearing inline in a link -- pointing at the PDF. Pandoc's CSL renderer emits article titles as -- @Span@ nodes (whose rendered HTML wraps quotation marks around the -- title text) and book titles as @Emph@; @Quoted@ appears in some -- other CSL styles. First match of any of these is treated as the -- title; subsequent ones pass through — journal names are also -- @Emph@ on @\@article@ entries but come after the @Span@ title, so -- the article case picks the right target. wrapFirstTitle :: Text -> [Inline] -> [Inline] wrapFirstTitle href inls = reverse . fst $ foldl step ([], False) inls where step (acc, True) inl = (inl:acc, True) step (acc, False) inl = case inl of Span _ _ -> (asPdfLink href [inl] : acc, True) Quoted _ _ -> (asPdfLink href [inl] : acc, True) Emph _ -> (asPdfLink href [inl] : acc, True) _ -> (inl:acc, False) -- | Build the @.pdf-link[data-pdf-src]@ anchor that popups.js binds to. -- See @static/js/popups.js:112@ for the matching selector. asPdfLink :: Text -> [Inline] -> Inline asPdfLink href content = Link ("", ["pdf-link"], [("data-pdf-src", href)]) content (href, "") -- | Trailing keyword strip, linking each keyword to the future -- @/bibliography/\/@ page. Returns @[]@ when the keyword -- list is empty so the entry gets no extra block at all. keywordsBlocks :: [String] -> [Block] keywordsBlocks [] = [] keywordsBlocks ks = [ Div ("", ["bib-keywords"], []) [Plain (intersperse (Str ", ") (map keywordLink ks))] ] where keywordLink k = Link ("", ["bib-keyword"], []) [Str (T.pack k)] (T.pack ("/bibliography/" ++ k ++ "/"), "") isCited :: Map Text Int -> Block -> Bool isCited keyIndex (Div (rid, _, _) _) = Map.member (stripRefPrefix rid) keyIndex isCited _ _ = False entryOrder :: Map Text Int -> Block -> Int entryOrder keyIndex (Div (rid, _, _) _) = fromMaybe maxBound $ Map.lookup (stripRefPrefix rid) keyIndex entryOrder _ _ = maxBound -- | Prepend [N] marker to a bibliography entry block. addNumber :: Int -> Block -> Block addNumber n (Div attrs@(divId, _, _) content) = Div attrs ( Plain [ RawInline "html" (" divId <> "\">[" <> T.pack (show n) <> "]") ] : content ) addNumber _ b = b -- | Strip the @ref-@ prefix that citeproc adds to div IDs. stripRefPrefix :: Text -> Text stripRefPrefix t = fromMaybe t (T.stripPrefix "ref-" t) -- | Render a list of blocks as an HTML string (used for bibliography sections). renderEntries :: Text -> [Block] -> Text renderEntries cls entries = case runPure (writeHtml5String wOpts (Pandoc nullMeta entries)) of Left _ -> "" Right html -> "
cls <> "\">\n" <> html <> "
\n" where wOpts = def { writerWrapText = WrapNone }