diff --git a/build/Authors.hs b/build/Authors.hs index 393fb5b..ea30c6b 100644 --- a/build/Authors.hs +++ b/build/Authors.hs @@ -12,32 +12,29 @@ module Authors ( buildAllAuthors , applyAuthorRules - , authorLinksField ) where -import Data.Char (isAlphaNum, toLower) import Data.Maybe (fromMaybe) import Hakyll -import Hakyll.Core.Metadata (lookupStringList) import Pagination (sortAndGroup) -import Tags (tagLinksField) +import Patterns (authorIndexable) +import Contexts (abstractField, tagLinksField) +import Utils (authorSlugify, authorNameOf) -- --------------------------------------------------------------------------- -- Slug helpers +-- +-- The slugify and nameOf helpers used to live here in their own +-- definitions; they now defer to 'Utils' so that they cannot drift from +-- the 'Contexts' versions on Unicode edge cases. -- --------------------------------------------------------------------------- --- | Lowercase, replace spaces with hyphens, strip anything else. slugify :: String -> String -slugify = map (\c -> if c == ' ' then '-' else c) - . filter (\c -> isAlphaNum c || c == ' ') - . map toLower +slugify = authorSlugify --- | Extract the author name from a "Name | url" entry, trimming whitespace. nameOf :: String -> String -nameOf s = strip $ case break (== '|') s of { (n, _) -> n } - where - strip = reverse . dropWhile (== ' ') . reverse . dropWhile (== ' ') +nameOf = authorNameOf -- --------------------------------------------------------------------------- @@ -47,8 +44,10 @@ nameOf s = strip $ case break (== '|') s of { (n, _) -> n } defaultAuthor :: String defaultAuthor = "Levi Neuwirth" +-- | Content patterns indexed by author. Sourced from 'Patterns.authorIndexable' +-- so this stays in lockstep with Tags.hs and Backlinks.hs. allContent :: Pattern -allContent = ("content/essays/*.md" .||. "content/blog/*.md") .&&. hasNoVersion +allContent = authorIndexable -- --------------------------------------------------------------------------- @@ -103,25 +102,7 @@ applyAuthorRules authors baseCtx = tagsRules authors $ \name pat -> do where itemCtx = dateField "date" "%-d %B %Y" <> tagLinksField "item-tags" + <> abstractField <> defaultContext --- --------------------------------------------------------------------------- --- Context field --- --------------------------------------------------------------------------- - --- | Exposes each item's authors as @author-name@ / @author-url@ pairs. --- All links point to /authors/{slug}/, regardless of any URL in frontmatter. --- Defaults to Levi Neuwirth when no "authors" frontmatter key is present. --- --- Usage in templates: --- $for(author-links)$$author-name$$sep$, $endfor$ -authorLinksField :: Context a -authorLinksField = listFieldWith "author-links" ctx $ \item -> do - meta <- getMetadata (itemIdentifier item) - let entries = fromMaybe [] (lookupStringList "authors" meta) - names = if null entries then [defaultAuthor] else map nameOf entries - return $ map (\n -> Item (fromFilePath "") (n, "/authors/" ++ slugify n ++ "/")) names - where - ctx = field "author-name" (return . fst . itemBody) - <> field "author-url" (return . snd . itemBody) diff --git a/build/Backlinks.hs b/build/Backlinks.hs index e364b81..0f37ef5 100644 --- a/build/Backlinks.hs +++ b/build/Backlinks.hs @@ -32,10 +32,12 @@ import Data.Ord (comparing) import Data.Maybe (fromMaybe) import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) +import qualified Data.ByteString as BS import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import qualified Data.Text.Encoding as TE +import qualified Data.Text.Encoding.Error as TE import qualified Data.Aeson as Aeson import Data.Aeson ((.=)) import Text.Pandoc.Class (runPure) @@ -47,6 +49,7 @@ import Text.Pandoc.Walk (query) import Hakyll import Compilers (readerOpts, writerOpts) import Filters (preprocessSource) +import qualified Patterns as P -- --------------------------------------------------------------------------- -- Link-with-context entry (intermediate, saved by the "links" pass) @@ -184,28 +187,47 @@ linksCompiler = do -- --------------------------------------------------------------------------- -- | Normalise an internal URL as a map key: strip query string, fragment, --- and trailing @.html@; ensure a leading slash. +-- and trailing @.html@; ensure a leading slash; percent-decode the path +-- so that @\/essays\/caf%C3%A9@ and @\/essays\/café@ collide on the same +-- key. normaliseUrl :: String -> String normaliseUrl url = let t = T.pack url t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t))) t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1 t3 = fromMaybe t2 (T.stripSuffix ".html" t2) - in T.unpack t3 + in percentDecode (T.unpack t3) + +-- | Decode percent-escapes (@%XX@) into raw bytes, then re-interpret the +-- resulting bytestring as UTF-8. Invalid escapes are passed through +-- verbatim so this is safe to call on already-decoded input. +percentDecode :: String -> String +percentDecode = T.unpack . TE.decodeUtf8With lenientDecode . pack . go + where + go [] = [] + go ('%':a:b:rest) + | Just hi <- hexDigit a + , Just lo <- hexDigit b + = fromIntegral (hi * 16 + lo) : go rest + go (c:rest) = fromIntegral (fromEnum c) : go rest + + hexDigit c + | c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0') + | c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10) + | c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10) + | otherwise = Nothing + + pack = BS.pack + lenientDecode = TE.lenientDecode -- --------------------------------------------------------------------------- --- Content patterns (must match the rules in Site.hs) +-- Content patterns (must match the rules in Site.hs — sourced from +-- Patterns.allContent so additions to the canonical list automatically +-- propagate to backlinks). -- --------------------------------------------------------------------------- allContent :: Pattern -allContent = - "content/essays/*.md" - .||. "content/essays/*/index.md" - .||. "content/blog/*.md" - .||. "content/poetry/*.md" - .||. "content/fiction/*.md" - .||. "content/music/*/index.md" - .||. "content/*.md" +allContent = P.allContent -- --------------------------------------------------------------------------- -- Hakyll rules diff --git a/build/Catalog.hs b/build/Catalog.hs index 5912f3f..85039a8 100644 --- a/build/Catalog.hs +++ b/build/Catalog.hs @@ -7,7 +7,8 @@ module Catalog ( musicCatalogCtx ) where -import Data.List (groupBy, sortBy) +import Data.Char (isSpace, toLower) +import Data.List (groupBy, isPrefixOf, sortBy) import Data.Maybe (fromMaybe) import Data.Ord (comparing) import Data.Aeson (Value (..)) @@ -15,7 +16,6 @@ import qualified Data.Aeson.KeyMap as KM import qualified Data.Vector as V import qualified Data.Text as T import Hakyll -import Hakyll.Core.Metadata (lookupStringList) import Contexts (siteCtx) -- --------------------------------------------------------------------------- @@ -115,6 +115,40 @@ parseCatalogEntry item = do -- --------------------------------------------------------------------------- -- HTML rendering -- --------------------------------------------------------------------------- +-- +-- Trust model: per the site convention (see also Stats.hs:pageLink), +-- frontmatter @title@ values are author-controlled trusted HTML and may +-- contain inline markup such as @...@. They are emitted +-- pre-escaped — but we still escape every other interpolated frontmatter +-- value (year, duration, instrumentation) and sanitize hrefs through +-- 'safeHref', so a stray @<@ in those fields cannot break the markup. + +-- | Defense-in-depth href sanitiser. Mirrors 'Stats.isSafeUrl'. +safeHref :: String -> String +safeHref u = + let norm = map toLower (dropWhile isSpace u) + in if not ("//" `isPrefixOf` norm) + && any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"] + then escAttr u + else "#" + +escAttr :: String -> String +escAttr = concatMap esc + where + esc '&' = "&" + esc '<' = "<" + esc '>' = ">" + esc '"' = """ + esc '\'' = "'" + esc c = [c] + +escText :: String -> String +escText = concatMap esc + where + esc '&' = "&" + esc '<' = "<" + esc '>' = ">" + esc c = [c] renderIndicators :: CatalogEntry -> String renderIndicators e = concatMap render @@ -129,19 +163,21 @@ renderEntry :: CatalogEntry -> String renderEntry e = concat [ "
No entries yet.
" - else concatMap renderEntry sorted + ++ (if null sorted + then "No entries yet.
" + else concatMap renderEntry sorted) ++ "tag if the abstract is a single paragraph. +-- Strips the outer @
@ wrapping. A single-paragraph abstract becomes a +-- bare @Plain@ so the rendered HTML is unwrapped inlines. A multi-paragraph +-- abstract (author used a blank line in the YAML literal block) is flattened +-- to a single @Plain@ with @LineBreak@ separators between what were +-- originally paragraph boundaries — the visual break is preserved without +-- emitting stray @
@ tags inside the metadata block. Mixed block content
+-- (e.g. an abstract containing a blockquote) falls through unchanged.
abstractField :: Context String
abstractField = field "abstract" $ \item -> do
meta <- getMetadata (itemIdentifier item)
@@ -138,12 +196,20 @@ abstractField = field "abstract" $ \item -> do
doc <- readMarkdown defaultHakyllReaderOptions (T.pack src)
let doc' = case doc of
Pandoc m [Para ils] -> Pandoc m [Plain ils]
- _ -> doc
+ Pandoc m blocks
+ | all isPara blocks && not (null blocks) ->
+ let joined = intercalate [LineBreak]
+ [ils | Para ils <- blocks]
+ in Pandoc m [Plain joined]
+ _ -> doc
let wOpts = defaultHakyllWriterOptions { writerHTMLMathMethod = MathML }
writeHtml5String wOpts doc'
case pandocResult of
Left err -> fail $ "Pandoc error rendering abstract: " ++ show err
Right html -> return (T.unpack html)
+ where
+ isPara (Para _) = True
+ isPara _ = False
siteCtx :: Context String
siteCtx =
@@ -208,21 +274,37 @@ dotsField ctxKey metaKey = field ctxKey $ \item -> do
-- | @$confidence-trend$@: ↑, ↓, or → derived from the last two entries
-- in the @confidence-history@ frontmatter list. Returns @noResult@ when
-- there is no history or only a single entry.
+--
+-- The arrow flips when the absolute change crosses 'trendThreshold'
+-- (currently 5 percentage points). Smaller swings count as flat.
confidenceTrendField :: Context String
confidenceTrendField = field "confidence-trend" $ \item -> do
meta <- getMetadata (itemIdentifier item)
case lookupStringList "confidence-history" meta of
- Nothing -> fail "no confidence history"
- Just xs | length xs < 2 -> fail "no confidence history"
- Just xs ->
- let prev = readMaybe (xs !! (length xs - 2)) :: Maybe Int
- cur = readMaybe (last xs) :: Maybe Int
- in case (prev, cur) of
- (Just p, Just c)
- | c - p > 5 -> return "\x2191" -- ↑
- | p - c > 5 -> return "\x2193" -- ↓
- | otherwise -> return "\x2192" -- →
- _ -> return "\x2192"
+ Nothing -> fail "no confidence history"
+ Just xs -> case lastTwo xs of
+ Nothing -> fail "no confidence history"
+ Just (prevS, curS) ->
+ let prev = readMaybe prevS :: Maybe Int
+ cur = readMaybe curS :: Maybe Int
+ in case (prev, cur) of
+ (Just p, Just c)
+ | c - p > trendThreshold -> return "\x2191" -- ↑
+ | p - c > trendThreshold -> return "\x2193" -- ↓
+ | otherwise -> return "\x2192" -- →
+ _ -> return "\x2192"
+ where
+ trendThreshold :: Int
+ trendThreshold = 5
+
+ -- Total replacement for @(xs !! (length xs - 2), last xs)@: returns
+ -- the last two elements of a list, in order, or 'Nothing' when the
+ -- list has fewer than two entries.
+ lastTwo :: [a] -> Maybe (a, a)
+ lastTwo [] = Nothing
+ lastTwo [_] = Nothing
+ lastTwo [a, b] = Just (a, b)
+ lastTwo (_ : rest) = lastTwo rest
-- | @$overall-score$@: weighted composite of confidence (50 %),
-- evidence quality (30 %), and importance (20 %), expressed as an
@@ -332,12 +414,27 @@ data Movement = Movement
, movAudio :: Maybe String
}
-parseMovements :: Metadata -> [Movement]
-parseMovements meta =
+-- | Parse the @movements@ frontmatter key. Returns parsed movements and a
+-- list of human-readable warnings for any entries that failed to parse.
+-- Callers can surface the warnings via 'unsafeCompiler' so silent typos
+-- don't strip movements without diagnostic.
+parseMovementsWithWarnings :: Metadata -> ([Movement], [String])
+parseMovementsWithWarnings meta =
case KM.lookup "movements" meta of
- Just (Array v) -> catMaybes $ map parseOne (V.toList v)
- _ -> []
+ Just (Array v) ->
+ let results = zipWith parseIndexed [1 :: Int ..] (V.toList v)
+ in ( [m | Right m <- results]
+ , [w | Left w <- results]
+ )
+ _ -> ([], [])
where
+ parseIndexed i value =
+ case parseOne value of
+ Just m -> Right m
+ Nothing -> Left $
+ "movement #" ++ show i ++ " is missing a required field "
+ ++ "(name, page, or duration) — entry skipped"
+
parseOne (Object o) = Movement
<$> (getString =<< KM.lookup "name" o)
<*> (getInt =<< KM.lookup "page" o)
@@ -351,6 +448,9 @@ parseMovements meta =
getInt (Number n) = Just (floor (fromRational (toRational n) :: Double))
getInt _ = Nothing
+parseMovements :: Metadata -> [Movement]
+parseMovements = fst . parseMovementsWithWarnings
+
-- | Extract the composition slug from an item's identifier.
-- "content/music/symphonic-dances/index.md" → "symphonic-dances"
compSlug :: Item a -> String
@@ -410,7 +510,11 @@ compositionCtx =
movementsListField = listFieldWith "movements" movCtx $ \item -> do
meta <- getMetadata (itemIdentifier item)
- let mvs = parseMovements meta
+ let (mvs, warnings) = parseMovementsWithWarnings meta
+ ident = toFilePath (itemIdentifier item)
+ unsafeCompiler $ mapM_
+ (\w -> putStrLn $ "[Movements] " ++ ident ++ ": " ++ w)
+ warnings
return $ zipWith
(\idx mv -> Item (fromFilePath ("mv" ++ show (idx :: Int))) mv)
[1..] mvs
diff --git a/build/Filters.hs b/build/Filters.hs
index d2c13e7..9f00073 100644
--- a/build/Filters.hs
+++ b/build/Filters.hs
@@ -22,16 +22,25 @@ import qualified Filters.Images as Images
-- | Apply all AST-level filters in pipeline order.
-- Run on the Pandoc document after reading, before writing.
-applyAll :: Pandoc -> Pandoc
-applyAll
- = Sidenotes.apply
- . Typography.apply
- . Links.apply
- . Smallcaps.apply
- . Dropcaps.apply
- . Math.apply
- . Code.apply
- . Images.apply
+--
+-- 'Filters.Images.apply' is the only IO-performing filter (it probes the
+-- filesystem for @.webp@ companions before deciding whether to emit
+-- @ / inside
--- a is invalid HTML and causes browsers to implicitly close the span.
-replacePTags :: Text -> Text
-replacePTags =
- T.replace " " ""
- . T.replace " @ (which would be
+-- invalid inside a @@); other block types are rendered with the
+-- regular Pandoc HTML writer.
+--
+-- Operating on the AST is preferred over post-rendered string
+-- substitution because the latter mangles content that legitimately
+-- contains the literal text @ @ (e.g. code samples discussing HTML).
+blocksToInlineHtml :: [Block] -> Text
+blocksToInlineHtml = T.concat . map renderOne
+ where
+ renderOne :: Block -> Text
+ renderOne (Para inlines) =
+ ""
+ <> inlinesToHtml inlines
+ <> ""
+ renderOne (Plain inlines) =
+ inlinesToHtml inlines
+ renderOne b =
+ blocksToHtml [b]
+
+-- | Render a list of inlines to HTML (no surrounding @ @).
+inlinesToHtml :: [Inline] -> Text
+inlinesToHtml inlines =
+ case runPure (writeHtml5String (def :: WriterOptions) (Pandoc mempty [Plain inlines])) of
+ Left _ -> T.empty
+ Right t -> t
-- | Render a list of Pandoc blocks to an HTML fragment via a pure writer run.
blocksToHtml :: [Block] -> Text
diff --git a/build/Filters/Smallcaps.hs b/build/Filters/Smallcaps.hs
index a81521e..3c7b5ad 100644
--- a/build/Filters/Smallcaps.hs
+++ b/build/Filters/Smallcaps.hs
@@ -22,6 +22,7 @@ import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk)
+import qualified Utils as U
-- | Apply smallcaps detection to paragraph-level content.
-- Skips heading blocks to avoid false positives.
@@ -62,10 +63,4 @@ isAbbreviation t =
&& T.any isAlpha t
escHtml :: Text -> Text
-escHtml = T.concatMap esc
- where
- esc '<' = "<"
- esc '>' = ">"
- esc '&' = "&"
- esc '"' = """
- esc c = T.singleton c
+escHtml = U.escapeHtmlText
diff --git a/build/Filters/Transclusion.hs b/build/Filters/Transclusion.hs
index ddc0822..4a18a93 100644
--- a/build/Filters/Transclusion.hs
+++ b/build/Filters/Transclusion.hs
@@ -14,6 +14,7 @@
module Filters.Transclusion (preprocess) where
import Data.List (isSuffixOf, isPrefixOf, stripPrefix)
+import qualified Utils as U
-- | Apply transclusion substitution to the raw Markdown source string.
preprocess :: String -> String
@@ -21,14 +22,18 @@ preprocess = unlines . map processLine . lines
processLine :: String -> String
processLine line =
- case parseDirective (trim line) of
+ case parseDirective (U.trim line) of
Nothing -> line
Just (url, secAttr) ->
- "@ fallback.
--- tools/convert-images.sh produces the companion .webp files at build time.
+-- For local raster images (JPG, JPEG, PNG, GIF) whose @.webp@ companion
+-- exists on disk at build time, emits a @
@ fallback. When the
+-- webp companion is absent (cwebp not installed, @convert-images.sh@ not
+-- yet run, or a single file missed), the filter emits a plain @
@ so
+-- the image still renders. This matters because browsers do NOT fall back
+-- from a 404'd @
@ — the
+-- source is selected up front and a broken one leaves the area blank.
+--
+-- @tools/convert-images.sh@ produces the companion .webp files at build
+-- time. When cwebp is not installed the script is a no-op, and this
+-- filter degrades gracefully to plain @
@.
--
-- SVG files and external URLs are passed through with only lazy loading
-- (and lightbox markers for standalone images).
module Filters.Images (apply) where
import Data.Char (toLower)
+import Data.List (isPrefixOf)
import Data.Text (Text)
import qualified Data.Text as T
-import System.FilePath (replaceExtension)
+import System.Directory (doesFileExist)
+import System.FilePath (replaceExtension, takeExtension, (>))
import Text.Pandoc.Definition
-import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Walk (walkM)
+import qualified Utils as U
-- | Apply image attribute injection and WebP wrapping to the entire document.
-apply :: Pandoc -> Pandoc
-apply = walk transformInline
+--
+-- @srcDir@ is the directory of the source Markdown file, used to resolve
+-- relative image paths when probing for the corresponding @.webp@
+-- companion file. Absolute paths (leading @/@) are resolved against
+-- @static/@ instead, matching the layout @convert-images.sh@ writes to.
+apply :: FilePath -> Pandoc -> IO Pandoc
+apply srcDir = walkM (transformInline srcDir)
-- ---------------------------------------------------------------------------
-- Core transformation
-- ---------------------------------------------------------------------------
-transformInline :: Inline -> Inline
-transformInline (Link lAttr ils lTarget) =
+transformInline :: FilePath -> Inline -> IO Inline
+transformInline srcDir (Link lAttr ils lTarget) = do
-- Recurse into link contents; images inside a link get no lightbox marker.
- Link lAttr (map wrapLinkedImg ils) lTarget
- where
- wrapLinkedImg (Image iAttr alt iTarget) = renderImg iAttr alt iTarget False
- wrapLinkedImg x = x
-transformInline (Image attr alt target) =
- renderImg attr alt target True
-transformInline x = x
+ ils' <- mapM (wrapLinkedImg srcDir) ils
+ pure (Link lAttr ils' lTarget)
+transformInline srcDir (Image attr alt target) =
+ renderImg srcDir attr alt target True
+transformInline _ x = pure x
+
+wrapLinkedImg :: FilePath -> Inline -> IO Inline
+wrapLinkedImg srcDir (Image iAttr alt iTarget) =
+ renderImg srcDir iAttr alt iTarget False
+wrapLinkedImg _ x = pure x
-- | Dispatch on image type:
--- * Local raster → @
@ with loading/lightbox attrs
-renderImg :: Attr -> [Inline] -> Target -> Bool -> Inline
-renderImg attr alt target@(src, _) lightbox
- | isLocalRaster (T.unpack src) =
- RawInline (Format "html") (renderPicture attr alt target lightbox)
+-- * Local raster with webp companion on disk → @
@ (graceful degradation)
+-- * Everything else (SVG, URL) → plain @
@ with loading/lightbox attrs
+renderImg :: FilePath -> Attr -> [Inline] -> Target -> Bool -> IO Inline
+renderImg srcDir attr alt target@(src, _) lightbox
+ | isLocalRaster (T.unpack src) = do
+ hasWebp <- doesFileExist (webpPhysicalPath srcDir src)
+ if hasWebp
+ then pure $ RawInline (Format "html")
+ (renderPicture attr alt target lightbox)
+ else pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr))
+ alt target
| otherwise =
- Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target
+ pure $ Image (addLightbox lightbox (addAttr "loading" "lazy" attr)) alt target
where
addLightbox True a = addAttr "data-lightbox" "true" a
addLightbox False a = a
+-- | Physical on-disk path of the @.webp@ companion for a Markdown image src.
+--
+-- Absolute paths (@/images/foo.jpg@) resolve under @static/@ because that
+-- is where Hakyll's static-asset rule writes them from. Relative paths
+-- resolve against the source file's directory, where Pandoc already
+-- expects co-located assets to live.
+webpPhysicalPath :: FilePath -> Text -> FilePath
+webpPhysicalPath srcDir src =
+ let s = T.unpack src
+ physical = if "/" `isPrefixOf` s
+ then "static" ++ s
+ else srcDir > s
+ in replaceExtension physical ".webp"
+
-- ---------------------------------------------------------------------------
--
.
+ passedKvs = filter
+ (\(k, _) -> k `notElem`
+ ["loading", "data-lightbox", "id", "class", "alt", "title", "src"])
+ kvs
attrId :: Text -> Text
attrId t = if T.null t then "" else " id=\"" <> esc t <> "\""
@@ -103,11 +148,11 @@ isLocalRaster src = not (isUrl src) && lowerExt src `elem` [".jpg", ".jpeg", ".p
isUrl :: String -> Bool
isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"]
- where isPrefixOf pfx str = take (length pfx) str == pfx
-- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg").
+-- Returns the empty string for paths with no extension.
lowerExt :: FilePath -> String
-lowerExt = map toLower . reverse . ('.' :) . takeWhile (/= '.') . tail . dropWhile (/= '.') . reverse
+lowerExt = map toLower . takeExtension
-- | Prepend a key=value pair if not already present.
addAttr :: Text -> Text -> Attr -> Attr
@@ -119,24 +164,28 @@ addAttr k v (i, cs, kvs)
stringify :: [Inline] -> Text
stringify = T.concat . map go
where
- go (Str t) = t
- go Space = " "
- go SoftBreak = " "
- go LineBreak = " "
- go (Emph ils) = stringify ils
- go (Strong ils) = stringify ils
- go (Code _ t) = t
- go (Link _ ils _) = stringify ils
- go (Image _ ils _) = stringify ils
- go (Span _ ils) = stringify ils
- go _ = ""
+ go (Str t) = t
+ go Space = " "
+ go SoftBreak = " "
+ go LineBreak = " "
+ go (Emph ils) = stringify ils
+ go (Strong ils) = stringify ils
+ go (Strikeout ils) = stringify ils
+ go (Superscript ils) = stringify ils
+ go (Subscript ils) = stringify ils
+ go (SmallCaps ils) = stringify ils
+ go (Underline ils) = stringify ils
+ go (Quoted _ ils) = stringify ils
+ go (Cite _ ils) = stringify ils
+ go (Code _ t) = t
+ go (Math _ t) = t
+ go (RawInline _ _) = ""
+ go (Link _ ils _) = stringify ils
+ go (Image _ ils _) = stringify ils
+ go (Span _ ils) = stringify ils
+ go (Note _) = ""
-- | HTML-escape a text value for use in attribute values.
+-- Defers to the canonical 'Utils.escapeHtmlText'.
esc :: Text -> Text
-esc = T.concatMap escChar
- where
- escChar '&' = "&"
- escChar '<' = "<"
- escChar '>' = ">"
- escChar '"' = """
- escChar c = T.singleton c
+esc = U.escapeHtmlText
diff --git a/build/Filters/Links.hs b/build/Filters/Links.hs
index 7a0c3dd..146aef2 100644
--- a/build/Filters/Links.hs
+++ b/build/Filters/Links.hs
@@ -25,14 +25,20 @@ apply = walk classifyLink . walk classifyPdfLink
-- Preserves the original path in @data-pdf-src@ so the popup thumbnail
-- provider can locate the corresponding @.thumb.png@ file.
-- Skips links that are already pointing at the viewer (idempotent).
+--
+-- Handles fragment identifiers (e.g. @/papers/foo.pdf#page=5@): the
+-- fragment is stripped before the @.pdf@ suffix check and re-attached
+-- after the viewer URL so PDF.js's anchor handling works.
classifyPdfLink :: Inline -> Inline
classifyPdfLink (Link (ident, classes, kvs) ils (url, title))
| "/" `T.isPrefixOf` url
- , ".pdf" `T.isSuffixOf` T.toLower url
+ , let (path, fragment) = T.break (== '#') url
+ , ".pdf" `T.isSuffixOf` T.toLower path
, "pdf-link" `notElem` classes =
- let viewerUrl = "/pdfjs/web/viewer.html?file=" <> encodeQueryValue url
+ let viewerUrl = "/pdfjs/web/viewer.html?file="
+ <> encodeQueryValue path <> fragment
classes' = classes ++ ["pdf-link"]
- kvs' = kvs ++ [("data-pdf-src", url)]
+ kvs' = kvs ++ [("data-pdf-src", path)]
in Link (ident, classes', kvs') ils (viewerUrl, title)
classifyPdfLink x = x
@@ -53,10 +59,33 @@ classifyLink x = x
-- Helpers
-- ---------------------------------------------------------------------------
+-- | True if the URL points outside the site's domain.
+--
+-- Uses a strict hostname comparison rather than substring matching, so
+-- that a hostile lookalike like @evil-levineuwirth.org.attacker.com@ is
+-- correctly classified as external (and gets @rel=noopener noreferrer@
+-- plus @target=_blank@ applied).
isExternal :: Text -> Bool
isExternal url =
- ("http://" `T.isPrefixOf` url || "https://" `T.isPrefixOf` url)
- && not ("levineuwirth.org" `T.isInfixOf` url)
+ case extractHost url of
+ Nothing -> False
+ Just host ->
+ not (host == siteHost || ("." <> siteHost) `T.isSuffixOf` host)
+ where
+ siteHost = "levineuwirth.org"
+
+-- | Extract the lowercased hostname from an absolute http(s) URL.
+-- Returns 'Nothing' for non-http(s) URLs (relative paths, mailto:, etc.).
+extractHost :: Text -> Maybe Text
+extractHost url
+ | Just rest <- T.stripPrefix "https://" url = Just (hostOf rest)
+ | Just rest <- T.stripPrefix "http://" url = Just (hostOf rest)
+ | otherwise = Nothing
+ where
+ hostOf rest =
+ let withPort = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest
+ host = T.takeWhile (/= ':') withPort
+ in T.toLower host
-- | Icon name for the link, matching a file in /images/link-icons/