{-# LANGUAGE GHC2021 #-} {-# LANGUAGE OverloadedStrings #-} -- | Image filter: lazy loading, lightbox markers, WebP -- wrappers, and CLS-preventing width/height attrs. -- -- For local raster images (JPG, JPEG, PNG, GIF) whose @.webp@ companion -- exists on disk at build time, emits a @@ element with a WebP -- @@ and the original format as the @@ fallback. When the -- webp companion is absent (cwebp not installed, @convert-images.sh@ not -- yet run, or a single file missed), the filter emits a plain @@ so -- the image still renders. This matters because browsers do NOT fall back -- from a 404'd @@ inside @@ to the nested @@ — the -- source is selected up front and a broken one leaves the area blank. -- -- @tools/convert-images.sh@ produces the companion .webp files at build -- time. When cwebp is not installed the script is a no-op, and this -- filter degrades gracefully to plain @@. -- -- SVG files and external URLs are passed through with only lazy loading -- (and lightbox markers for standalone images). -- -- Width / height attrs are looked up from @{image}.dims.yaml@ sidecars -- produced by @tools/extract-dimensions.py@ at build time, on the same -- path-resolution rules as the WebP companion check (absolute paths -- under @static/@, relative under the source-file directory). When a -- sidecar is missing the filter emits an attr-free rather than -- guessing — partial dimensions are worse than no dimensions, since -- the browser would then size the image wrong on first paint. module Filters.Images (apply) where import Data.Char (toLower) import Data.Default (def) import Data.List (isPrefixOf) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Aeson.KeyMap as KM import qualified Data.Scientific as Sci import qualified Data.Yaml as Y import Text.Pandoc.Definition import qualified Text.Pandoc as Pandoc import Text.Pandoc.Walk (walkM) import System.Directory (doesFileExist) import System.FilePath (replaceExtension, takeExtension, ()) import qualified Utils as U -- | Apply image attribute injection and WebP wrapping to the entire document. -- -- @srcDir@ is the directory of the source Markdown file, used to resolve -- relative image paths when probing for the corresponding @.webp@ -- companion file. Absolute paths (leading @/@) are resolved against -- @static/@ instead, matching the layout @convert-images.sh@ writes to. -- -- Two-pass walk: -- -- 1. Block-level pass (@transformBlock@) intercepts standalone -- figures so we can synthesize the entire @
@ ourselves -- when WebP wrapping kicks in. Without this pass, replacing the -- inner @Image@ with a @RawInline@ would break Pandoc's -- alt-vs-caption comparison and we'd lose the -- @aria-hidden="true"@ hint on identical-text figcaptions. -- 2. Inline-level pass (@transformInline@) handles every remaining -- @Image@ — inline-in-prose, inside @Link@s, etc. Pandoc's writer -- still applies its accessibility heuristics for figures we -- didn't synthesize (notably the no-WebP case). apply :: FilePath -> Pandoc -> IO Pandoc apply srcDir doc = do doc' <- walkM (transformBlock srcDir) doc walkM (transformInline srcDir) doc' -- --------------------------------------------------------------------------- -- Core transformations -- --------------------------------------------------------------------------- -- | Block-level pass. Currently only acts on the simple-figure shape -- that Pandoc's Markdown reader produces for @![alt](src)@ standalone: -- -- @Figure attr caption [Plain [Image imgAttr alt target]]@ -- -- When the image has a WebP companion on disk, we replace the whole -- Figure with a @RawBlock@ containing the equivalent HTML — but with -- the @@ wrapper inside and a manually-emitted -- @aria-hidden="true"@ on the figcaption when alt text equals the -- caption text. Anything more exotic (multi-image figures, mixed -- block content inside the figure, no-WebP images) is left to -- Pandoc's default emission, which is already correct for those -- cases. transformBlock :: FilePath -> Block -> IO Block transformBlock srcDir b@(Figure figAttr caption [Plain [Image imgAttr alt target]]) = do let src = T.unpack (fst target) if not (isLocalRaster src) then pure b else do hasWebp <- doesFileExist (webpPhysicalPath srcDir (fst target)) if not hasWebp then pure b -- Pandoc handles aria-hidden naturally on the no-WebP path. else synthesizeFigure srcDir figAttr caption imgAttr alt target transformBlock _ b = pure b -- | Build a @
@ block from an Image and its surrounding -- metadata. Used only on the WebP branch; the no-WebP branch leaves -- Pandoc to emit the figure naturally. -- -- Aria-hiding rule: when the caption's plain-text content equals the -- alt text and both are non-empty, mark the @
@ with -- @aria-hidden="true"@. Screen readers then announce the alt -- (via the @@) and skip the figcaption (which would just -- duplicate it). Non-matching captions render as visible content. -- -- Caption inline rendering goes through Pandoc's HTML writer, so -- formatting (italic, links, code, etc.) is preserved. synthesizeFigure :: FilePath -> Attr -> Caption -> Attr -> [Inline] -> Target -> IO Block synthesizeFigure srcDir figAttr caption imgAttr alt target = do dims <- readDims srcDir (fst target) let pictureHtml = renderPicture imgAttr alt target True dims capInlines = captionInlines caption capText = stringify capInlines altText = stringify alt useAriaHide = capText == altText && not (T.null altText) pure $ RawBlock (Format "html") $ renderFigure figAttr pictureHtml (renderFigcaption capInlines useAriaHide) transformInline :: FilePath -> Inline -> IO Inline transformInline srcDir (Link lAttr ils lTarget) = do -- Recurse into link contents; images inside a link get no lightbox marker. ils' <- mapM (wrapLinkedImg srcDir) ils pure (Link lAttr ils' lTarget) transformInline srcDir (Image attr alt target) = renderImg srcDir attr alt target True transformInline _ x = pure x wrapLinkedImg :: FilePath -> Inline -> IO Inline wrapLinkedImg srcDir (Image iAttr alt iTarget) = renderImg srcDir iAttr alt iTarget False wrapLinkedImg _ x = pure x -- | Dispatch on image type: -- * Local raster with webp companion on disk → @@ with WebP @@ -- * Local raster without companion → plain @@ (graceful degradation) -- * Everything else (SVG, URL) → plain @@ with loading/lightbox attrs -- -- In all three branches, when a @{image}.dims.yaml@ sidecar is -- present, @width@ and @height@ attrs are emitted on the rendered -- @@. The sidecar lookup is skipped for non-local sources -- (HTTP URLs, data URIs) since there's no local file to measure. renderImg :: FilePath -> Attr -> [Inline] -> Target -> Bool -> IO Inline renderImg srcDir attr alt target@(src, _) lightbox = do let s = T.unpack src isRaster = isLocalRaster s local = not (isUrl s) dims <- if local then readDims srcDir src else pure Nothing if isRaster then do hasWebp <- doesFileExist (webpPhysicalPath srcDir src) if hasWebp then pure $ RawInline (Format "html") (renderPicture attr alt target lightbox dims) else pure $ Image (commonAttrs dims) alt target else pure $ Image (commonAttrs dims) alt target where commonAttrs dims = withDims dims $ addAttr "decoding" "async" $ addLightbox lightbox $ addAttr "loading" "lazy" attr addLightbox True a = addAttr "data-lightbox" "true" a addLightbox False a = a withDims Nothing a = a withDims (Just (w, h)) a = addAttr "width" (T.pack (show w)) (addAttr "height" (T.pack (show h)) a) -- | Physical on-disk path of the @.webp@ companion for a Markdown image src. -- -- Absolute paths (@/images/foo.jpg@) resolve under @static/@ because that -- is where Hakyll's static-asset rule writes them from. Relative paths -- resolve against the source file's directory, where Pandoc already -- expects co-located assets to live. webpPhysicalPath :: FilePath -> Text -> FilePath webpPhysicalPath srcDir src = let s = T.unpack src physical = if "/" `isPrefixOf` s then "static" ++ s else srcDir s in replaceExtension physical ".webp" -- | Physical on-disk path of the @.dims.yaml@ sidecar for a Markdown -- image src. Same path-resolution rules as 'webpPhysicalPath'; the -- sidecar lives next to the original image with the literal -- extension @.dims.yaml@ appended. dimsPhysicalPath :: FilePath -> Text -> FilePath dimsPhysicalPath srcDir src = let s = T.unpack src physical = if "/" `isPrefixOf` s then "static" ++ s else srcDir s in physical ++ ".dims.yaml" -- | Read the @{image}.dims.yaml@ sidecar and return @(width, height)@ -- when present and parseable. Returns 'Nothing' on absent file, -- parse error, missing keys, or non-integer values — all of which -- cause the filter to emit no width/height attrs (rather than a -- guess that would size the image wrong on first paint). readDims :: FilePath -> Text -> IO (Maybe (Int, Int)) readDims srcDir src = do let path = dimsPhysicalPath srcDir src exists <- doesFileExist path if not exists then pure Nothing else do decoded <- Y.decodeFileEither path pure $ case decoded of Right (Y.Object obj) -> do w <- intValue =<< KM.lookup "width" obj h <- intValue =<< KM.lookup "height" obj Just (w, h) _ -> Nothing where intValue :: Y.Value -> Maybe Int intValue (Y.Number n) = Sci.toBoundedInteger n intValue _ = Nothing -- --------------------------------------------------------------------------- -- rendering -- --------------------------------------------------------------------------- -- | Emit a @@ element with a WebP @@ and an @@ fallback. renderPicture :: Attr -> [Inline] -> Target -> Bool -> Maybe (Int, Int) -> Text renderPicture (ident, classes, kvs) alt (src, title) lightbox dims = T.concat [ "" , "" , "" , "" ] where webpSrc = replaceExtension (T.unpack src) ".webp" -- Strip attrs we handle explicitly above (id/class/alt/title) and the -- attrs we always emit ourselves (loading, decoding, data-lightbox, -- width, height), so they don't appear twice on the . passedKvs = filter (\(k, _) -> k `notElem` [ "loading", "decoding", "data-lightbox" , "id", "class", "alt", "title", "src" , "width", "height" ]) kvs dimsAttrs Nothing = "" dimsAttrs (Just (w, h)) = " width=\"" <> T.pack (show w) <> "\" height=\"" <> T.pack (show h) <> "\"" -- --------------------------------------------------------------------------- --
synthesis (Block walk, WebP path only) -- --------------------------------------------------------------------------- -- | Build a @
@ HTML element wrapping pre-rendered inner -- content (typically a @@) and a pre-rendered figcaption. -- Preserves any id / classes / kvs from the surrounding Pandoc -- 'Figure' attr. renderFigure :: Attr -> Text -> Text -> Text renderFigure (figId, figClasses, figKvs) inner figcaption = T.concat [ "\n" , inner , "\n" , figcaption , "\n
" ] -- | Build a @
@ element. When @ariaHidden@ is true, emits -- @aria-hidden="true"@ — used when the caption text exactly -- duplicates the image alt (so screen readers don't announce the -- same content twice). Caption inlines render through Pandoc's HTML -- writer to preserve formatting. renderFigcaption :: [Inline] -> Bool -> Text renderFigcaption ils ariaHidden = let body = renderInlinesToHtml ils attrs = if ariaHidden then " aria-hidden=\"true\"" else "" in " attrs <> ">" <> body <> "
" -- | Pandoc 'Caption' has a long form (@[Block]@) and an optional short -- form (@Maybe ShortCaption@). We use the long form, flattening any -- @Plain@ / @Para@ blocks into a single inline list. Multi-block -- captions (rare) collapse to the inlines of their text-bearing -- blocks; non-text blocks (like nested lists) are dropped, since -- they don't make sense in a figcaption anyway. captionInlines :: Caption -> [Inline] captionInlines (Caption _ blocks) = concatMap go blocks where go (Plain ils) = ils go (Para ils) = ils go _ = [] -- | Render Pandoc 'Inline' nodes to HTML using Pandoc's own writer. -- Wrapping the inlines in a @Plain@ block (rather than @Para@) -- avoids the surrounding @

@ tag the writer would otherwise emit. -- On writer failure (extremely unlikely for inline-only input), -- falls back to the plain-text 'stringify' rendering — a worse but -- still safe figcaption. renderInlinesToHtml :: [Inline] -> Text renderInlinesToHtml ils = case Pandoc.runPure (Pandoc.writeHtml5String def doc) of Right t -> T.strip t Left _ -> stringify ils where doc = Pandoc mempty [Plain ils] attrId :: Text -> Text attrId t = if T.null t then "" else " id=\"" <> esc t <> "\"" attrClasses :: [Text] -> Text attrClasses [] = "" attrClasses cs = " class=\"" <> T.intercalate " " (map esc cs) <> "\"" attrAlt :: [Inline] -> Text attrAlt ils = let t = stringify ils in if T.null t then "" else " alt=\"" <> esc t <> "\"" attrTitle :: Text -> Text attrTitle t = if T.null t then "" else " title=\"" <> esc t <> "\"" renderKvs :: [(Text, Text)] -> Text renderKvs = T.concat . map (\(k, v) -> " " <> k <> "=\"" <> esc v <> "\"") -- --------------------------------------------------------------------------- -- Helpers -- --------------------------------------------------------------------------- -- | True for local (non-URL) images with a raster format we can convert. isLocalRaster :: FilePath -> Bool isLocalRaster src = not (isUrl src) && lowerExt src `elem` [".jpg", ".jpeg", ".png", ".gif"] isUrl :: String -> Bool isUrl s = any (`isPrefixOf` s) ["http://", "https://", "//", "data:"] -- | Extension of a path, lowercased (e.g. ".JPG" → ".jpg"). -- Returns the empty string for paths with no extension. lowerExt :: FilePath -> String lowerExt = map toLower . takeExtension -- | Prepend a key=value pair if not already present. addAttr :: Text -> Text -> Attr -> Attr addAttr k v (i, cs, kvs) | any ((== k) . fst) kvs = (i, cs, kvs) | otherwise = (i, cs, (k, v) : kvs) -- | Plain-text content of a list of inlines (for alt text). stringify :: [Inline] -> Text stringify = T.concat . map go where go (Str t) = t go Space = " " go SoftBreak = " " go LineBreak = " " go (Emph ils) = stringify ils go (Strong ils) = stringify ils go (Strikeout ils) = stringify ils go (Superscript ils) = stringify ils go (Subscript ils) = stringify ils go (SmallCaps ils) = stringify ils go (Underline ils) = stringify ils go (Quoted _ ils) = stringify ils go (Cite _ ils) = stringify ils go (Code _ t) = t go (Math _ t) = t go (RawInline _ _) = "" go (Link _ ils _) = stringify ils go (Image _ ils _) = stringify ils go (Span _ ils) = stringify ils go (Note _) = "" -- | HTML-escape a text value for use in attribute values. -- Defers to the canonical 'Utils.escapeHtmlText'. esc :: Text -> Text esc = U.escapeHtmlText