ozymandias/build/Filters/Smallcaps.hs

67 lines
2.5 KiB
Haskell

{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Automatic small-caps wrapping for abbreviations in body text.
--
-- Any @Str@ token that consists entirely of uppercase letters (and
-- hyphens) and is at least three characters long is wrapped in
-- @<abbr class="smallcaps">@. This catches CSS, HTML, API, NASA, etc.
-- while avoiding single-character tokens (\"I\", \"A\") and mixed-case
-- words.
--
-- Authors can also use Pandoc span syntax for explicit control:
-- @[TEXT]{.smallcaps}@ — Pandoc already emits the @smallcaps@ class on
-- those spans, and typography.css styles @.smallcaps@ directly, so no
-- extra filter logic is needed for that case.
--
-- The filter is /not/ applied inside headings (where Fira Sans uppercase
-- text looks intentional) or inside @Code@/@RawInline@ inlines.
module Filters.Smallcaps (apply) where
import Data.Char (isUpper, isAlpha)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk)
import qualified Utils as U
-- | Apply smallcaps detection to paragraph-level content.
-- Skips heading blocks to avoid false positives.
apply :: Pandoc -> Pandoc
apply (Pandoc meta blocks) = Pandoc meta (map applyBlock blocks)
applyBlock :: Block -> Block
applyBlock b@(Header {}) = b -- leave headings untouched
applyBlock b = walk wrapCaps b
-- | Wrap an all-caps Str token in an abbr element, preserving any trailing
-- punctuation (comma, period, colon, semicolon, closing paren/bracket)
-- outside the abbr element.
wrapCaps :: Inline -> Inline
wrapCaps (Str t) =
let (core, trail) = stripTrailingPunct t
in if isAbbreviation core
then RawInline "html" $
"<abbr class=\"smallcaps\">" <> escHtml core <> "</abbr>"
<> trail
else Str t
wrapCaps x = x
-- | Split trailing punctuation from the token body.
stripTrailingPunct :: Text -> (Text, Text)
stripTrailingPunct t =
let isPunct c = c `elem` (",.:;!?)]\'" :: String)
trail = T.takeWhileEnd isPunct t
core = T.dropEnd (T.length trail) t
in (core, trail)
-- | True if the token looks like an abbreviation: all uppercase (plus
-- hyphens), at least 3 characters, contains at least one alpha character.
isAbbreviation :: Text -> Bool
isAbbreviation t =
T.length t >= 3
&& T.all (\c -> isUpper c || c == '-') t
&& T.any isAlpha t
escHtml :: Text -> Text
escHtml = U.escapeHtmlText