Filters: fence/code-span awareness, host matching, nested-header skip
- SourceRefs trigger whitelist aligned to the /source/ serving whitelist (drops content/, yaml-source/, broad static//tools//data prefixes; adds .bib); existsCached no longer memoizes non-existence, so files created under make watch are picked up (§2.5, §2.16) - fill/stroke hex replacement is boundary-aware: #000080 and 8-digit RGBA forms can no longer be corrupted into currentColor80 (§2.12) - Wikilinks/Transclusion/EmbedPdf skip fenced code blocks (shared CommonMark fence tracker), and wikilinks additionally skip inline code spans — the syntax-documentation essay now renders its own examples literally while live wikilinks still convert (verified both ways in output) (§2.13) - domainIcon matches the extracted host by label suffix instead of substring-of-URL; extractHost also strips userinfo (§2.14) - webpSrc escaped in srcset; internal PDF links no longer double- classified; Smallcaps/Archive header-skip now holds at every nesting depth via protect/restore walks (§2.17) Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
c8eeaaa9bc
commit
f254ce866e
|
|
@ -30,22 +30,45 @@ import Text.Pandoc.Walk (walk)
|
|||
import ArchiveIndex (ArchiveStatus (..), archiveIndexIsEmpty,
|
||||
archiveSlugFor, archiveStatusForSlug)
|
||||
|
||||
-- | Annotate body links. Headings are left alone — an affordance there
|
||||
-- would be noise. Identity when the index is empty.
|
||||
-- | Annotate body links. Links inside headings are left alone at
|
||||
-- /every/ nesting depth — an affordance there would be noise, and a
|
||||
-- top-level pattern match would miss a @Header@ inside a @Div@ or
|
||||
-- @BlockQuote@. Header links are tagged with a sentinel class before
|
||||
-- the annotation walk and stripped of it afterwards, so the sentinel
|
||||
-- can never leak into the writer. Identity when the index is empty.
|
||||
apply :: Pandoc -> Pandoc
|
||||
apply doc@(Pandoc meta blocks)
|
||||
apply doc
|
||||
| archiveIndexIsEmpty = doc
|
||||
| otherwise = Pandoc meta (map annotateBlock blocks)
|
||||
| otherwise =
|
||||
walk unprotectLink . walk annotateInlines . walk protectHeader $ doc
|
||||
|
||||
annotateBlock :: Block -> Block
|
||||
annotateBlock h@Header{} = h
|
||||
annotateBlock b = walk annotateInlines b
|
||||
-- | Sentinel class marking a link the annotation walk must skip. It
|
||||
-- only exists between the protect and unprotect walks inside 'apply'.
|
||||
skipClass :: T.Text
|
||||
skipClass = "archive-header-skip"
|
||||
|
||||
protectHeader :: Block -> Block
|
||||
protectHeader (Header lvl attr ils) = Header lvl attr (walk protect ils)
|
||||
where
|
||||
protect (Link (ident, cls, kvs) text target) =
|
||||
Link (ident, skipClass : cls, kvs) text target
|
||||
protect x = x
|
||||
protectHeader b = b
|
||||
|
||||
unprotectLink :: Inline -> Inline
|
||||
unprotectLink (Link (ident, cls, kvs) text target)
|
||||
| skipClass `elem` cls =
|
||||
Link (ident, filter (/= skipClass) cls, kvs) text target
|
||||
unprotectLink x = x
|
||||
|
||||
-- | For each archived @Link@: flip it if the target is 'Rotted', else
|
||||
-- append the affordance. Non-archived links pass through untouched.
|
||||
-- append the affordance. Non-archived links — and links protected by
|
||||
-- 'protectHeader' — pass through untouched.
|
||||
annotateInlines :: [Inline] -> [Inline]
|
||||
annotateInlines = concatMap expand
|
||||
where
|
||||
expand l@(Link (_, cls, _) _ _)
|
||||
| skipClass `elem` cls = [l]
|
||||
expand l@(Link attr text (url, _)) =
|
||||
case archiveSlugFor url of
|
||||
Nothing -> [l]
|
||||
|
|
|
|||
|
|
@ -12,15 +12,23 @@
|
|||
--
|
||||
-- The file path must be root-relative (begins with @/@).
|
||||
-- PDF.js is expected to be vendored at @/pdfjs/web/viewer.html@.
|
||||
--
|
||||
-- Code protection (honest scope): lines inside /fenced/ code blocks
|
||||
-- are passed through untouched ('Filters.Wikilinks.mapOutsideFences'),
|
||||
-- so fenced examples can show @{{pdf:…}}@ literally. Indented code
|
||||
-- blocks and inline code spans are NOT recognised — a full-line
|
||||
-- directive inside either is still rewritten.
|
||||
module Filters.EmbedPdf (preprocess) where
|
||||
|
||||
import Data.Char (isDigit)
|
||||
import Data.List (isPrefixOf, isSuffixOf)
|
||||
import Filters.Wikilinks (mapOutsideFences)
|
||||
import qualified Utils as U
|
||||
|
||||
-- | Apply PDF-embed substitution to the raw Markdown source string.
|
||||
-- | Apply PDF-embed substitution to the raw Markdown source string,
|
||||
-- skipping lines inside fenced code blocks.
|
||||
preprocess :: String -> String
|
||||
preprocess = unlines . map processLine . lines
|
||||
preprocess = mapOutsideFences processLine
|
||||
|
||||
processLine :: String -> String
|
||||
processLine line =
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ renderPicture :: Attr -> [Inline] -> Target -> Bool -> Maybe (Int, Int) -> Text
|
|||
renderPicture (ident, classes, kvs) alt (src, title) lightbox dims =
|
||||
T.concat
|
||||
[ "<picture>"
|
||||
, "<source srcset=\"", T.pack webpSrc, "\" type=\"image/webp\">"
|
||||
, "<source srcset=\"", esc (T.pack webpSrc), "\" type=\"image/webp\">"
|
||||
, "<img"
|
||||
, attrId ident
|
||||
, attrClasses classes
|
||||
|
|
|
|||
|
|
@ -16,8 +16,11 @@ import Text.Pandoc.Definition
|
|||
import Text.Pandoc.Walk (walk)
|
||||
|
||||
-- | Apply link classification to the entire document.
|
||||
-- Two passes: PDF links first (rewrites href to viewer URL), then external
|
||||
-- link classification (operates on http/https, so no overlap).
|
||||
-- Two passes: PDF links first (rewrites href to the viewer URL and tags
|
||||
-- the anchor @pdf-link@), then general classification. The second pass
|
||||
-- explicitly skips anchors the PDF pass already claimed — the viewer URL
|
||||
-- is root-relative, so without that guard it would also be classified as
|
||||
-- an internal page link and get double chrome.
|
||||
apply :: Pandoc -> Pandoc
|
||||
apply = walk classifyLink . walk classifyPdfLink
|
||||
|
||||
|
|
@ -49,6 +52,11 @@ classifyLink l@(Link (_, classes, _) _ _)
|
|||
-- brand icon stamp, and have their own popup provider. Leave them
|
||||
-- entirely alone.
|
||||
| "source-ref" `elem` classes = l
|
||||
-- PDF links were already rewritten to the (root-relative) viewer URL
|
||||
-- and given their own chrome by 'classifyPdfLink' in the preceding
|
||||
-- pass; without this guard they would be double-classified as
|
||||
-- internal page links.
|
||||
| "pdf-link" `elem` classes = l
|
||||
classifyLink (Link (ident, classes, kvs) ils (url, title))
|
||||
| isExternal url =
|
||||
let icon = domainIcon url
|
||||
|
|
@ -100,8 +108,9 @@ isExternal url =
|
|||
where
|
||||
siteHost = "levineuwirth.org"
|
||||
|
||||
-- | Extract the lowercased hostname from an absolute http(s) URL.
|
||||
-- Returns 'Nothing' for non-http(s) URLs (relative paths, mailto:, etc.).
|
||||
-- | Extract the lowercased hostname from an absolute http(s) URL,
|
||||
-- stripping any userinfo (@user:pass\@@) and port. Returns 'Nothing'
|
||||
-- for non-http(s) URLs (relative paths, mailto:, etc.).
|
||||
extractHost :: Text -> Maybe Text
|
||||
extractHost url
|
||||
| Just rest <- T.stripPrefix "https://" url = Just (hostOf rest)
|
||||
|
|
@ -109,45 +118,60 @@ extractHost url
|
|||
| otherwise = Nothing
|
||||
where
|
||||
hostOf rest =
|
||||
let withPort = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest
|
||||
host = T.takeWhile (/= ':') withPort
|
||||
let authority = T.takeWhile (\c -> c /= '/' && c /= '?' && c /= '#') rest
|
||||
-- 'T.breakOnEnd' yields the segment after the last @\@@, or
|
||||
-- the whole authority when there is no userinfo.
|
||||
(_, hostPort) = T.breakOnEnd "@" authority
|
||||
host = T.takeWhile (/= ':') hostPort
|
||||
in T.toLower host
|
||||
|
||||
-- | Icon name for the link, matching a file in /images/link-icons/<name>.svg.
|
||||
--
|
||||
-- Matches on the URL's host only, never on the full URL — a path like
|
||||
-- @https://example.org/why-x.com-failed@ must not get the Twitter
|
||||
-- icon. URLs with no extractable host get the generic icon.
|
||||
domainIcon :: Text -> Text
|
||||
domainIcon url
|
||||
domainIcon url = maybe "external" iconForHost (extractHost url)
|
||||
|
||||
iconForHost :: Text -> Text
|
||||
iconForHost host
|
||||
-- Scholarly / reference
|
||||
| "wikipedia.org" `T.isInfixOf` url = "wikipedia"
|
||||
| "arxiv.org" `T.isInfixOf` url = "arxiv"
|
||||
| "doi.org" `T.isInfixOf` url = "doi"
|
||||
| "worldcat.org" `T.isInfixOf` url = "worldcat"
|
||||
| "orcid.org" `T.isInfixOf` url = "orcid"
|
||||
| "archive.org" `T.isInfixOf` url = "internet-archive"
|
||||
| m "wikipedia.org" = "wikipedia"
|
||||
| m "arxiv.org" = "arxiv"
|
||||
| m "doi.org" = "doi"
|
||||
| m "worldcat.org" = "worldcat"
|
||||
| m "orcid.org" = "orcid"
|
||||
| m "archive.org" = "internet-archive"
|
||||
-- Code / software
|
||||
| "github.com" `T.isInfixOf` url = "github"
|
||||
| "git.levineuwirth.org" `T.isInfixOf` url = "forgejo"
|
||||
| "tensorflow.org" `T.isInfixOf` url = "tensorflow"
|
||||
| m "github.com" = "github"
|
||||
| m "git.levineuwirth.org" = "forgejo"
|
||||
| m "tensorflow.org" = "tensorflow"
|
||||
-- AI companies (consumer products share a brand icon with the lab)
|
||||
| "anthropic.com" `T.isInfixOf` url = "anthropic"
|
||||
| "claude.ai" `T.isInfixOf` url = "anthropic"
|
||||
| "openai.com" `T.isInfixOf` url = "openai"
|
||||
| "chatgpt.com" `T.isInfixOf` url = "openai"
|
||||
| m "anthropic.com" = "anthropic"
|
||||
| m "claude.ai" = "anthropic"
|
||||
| m "openai.com" = "openai"
|
||||
| m "chatgpt.com" = "openai"
|
||||
-- Social / media
|
||||
| "twitter.com" `T.isInfixOf` url = "twitter"
|
||||
| "x.com" `T.isInfixOf` url = "twitter"
|
||||
| "reddit.com" `T.isInfixOf` url = "reddit"
|
||||
| "youtube.com" `T.isInfixOf` url = "youtube"
|
||||
| "youtu.be" `T.isInfixOf` url = "youtube"
|
||||
| "tiktok.com" `T.isInfixOf` url = "tiktok"
|
||||
| "substack.com" `T.isInfixOf` url = "substack"
|
||||
| "news.ycombinator.com" `T.isInfixOf` url = "hacker-news"
|
||||
| "lesswrong.com" `T.isInfixOf` url = "lesswrong"
|
||||
| m "twitter.com" = "twitter"
|
||||
| m "x.com" = "twitter"
|
||||
| m "reddit.com" = "reddit"
|
||||
| m "youtube.com" = "youtube"
|
||||
| m "youtu.be" = "youtube"
|
||||
| m "tiktok.com" = "tiktok"
|
||||
| m "substack.com" = "substack"
|
||||
| m "news.ycombinator.com" = "hacker-news"
|
||||
| m "lesswrong.com" = "lesswrong"
|
||||
-- News
|
||||
| "nytimes.com" `T.isInfixOf` url = "new-york-times"
|
||||
| m "nytimes.com" = "new-york-times"
|
||||
-- Institutions
|
||||
| "nasa.gov" `T.isInfixOf` url = "nasa"
|
||||
| "apple.com" `T.isInfixOf` url = "apple"
|
||||
| m "nasa.gov" = "nasa"
|
||||
| m "apple.com" = "apple"
|
||||
| otherwise = "external"
|
||||
where
|
||||
-- Label-suffix match: the host is the domain itself or a subdomain
|
||||
-- of it. Never fires on a lookalike label (@notx.com@) or on text
|
||||
-- in the path or query.
|
||||
m d = host == d || ("." <> d) `T.isSuffixOf` host
|
||||
|
||||
-- | Percent-encode characters that would break a @?file=@ query-string value.
|
||||
-- Slashes are intentionally left unencoded so root-relative paths remain
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
module Filters.Score (inlineScores) where
|
||||
|
||||
import Control.Exception (IOException, try)
|
||||
import Data.Char (isHexDigit)
|
||||
import Data.Maybe (listToMaybe)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.IO as TIO
|
||||
|
|
@ -86,25 +87,48 @@ findImagePath blocks = listToMaybe
|
|||
-- | Replace hardcoded black fill/stroke values with @currentColor@ so the
|
||||
-- SVG inherits the CSS @color@ property in both light and dark modes.
|
||||
--
|
||||
-- 6-digit hex patterns are at the bottom of the composition chain
|
||||
-- (applied first) so they are replaced before the 3-digit shorthand,
|
||||
-- preventing partial matches (e.g. @#000@ matching the prefix of @#000000@).
|
||||
-- Quoted attribute forms (@fill="#000"@) are self-delimiting — the
|
||||
-- closing quote bounds the match — so plain 'T.replace' is safe for
|
||||
-- them. Unquoted style-property forms (@fill:#000@) are not: naive
|
||||
-- replacement would also fire on the prefix of a longer hex colour
|
||||
-- (@fill:#000080@ → @fill:currentColor80@, invalid CSS). Those go
|
||||
-- through 'replaceHexColor', which rewrites a match only when it is
|
||||
-- not followed by another hex digit; the boundary check also makes
|
||||
-- the 3-digit/6-digit application order irrelevant.
|
||||
processColors :: T.Text -> T.Text
|
||||
processColors
|
||||
-- 3-digit hex and keyword patterns (applied after 6-digit replacements)
|
||||
-- 3-digit hex and keyword patterns
|
||||
= T.replace "fill=\"#000\"" "fill=\"currentColor\""
|
||||
. T.replace "fill=\"black\"" "fill=\"currentColor\""
|
||||
. T.replace "stroke=\"#000\"" "stroke=\"currentColor\""
|
||||
. T.replace "stroke=\"black\"" "stroke=\"currentColor\""
|
||||
. T.replace "fill:#000" "fill:currentColor"
|
||||
. replaceHexColor "fill:#000" "fill:currentColor"
|
||||
. T.replace "fill:black" "fill:currentColor"
|
||||
. T.replace "stroke:#000" "stroke:currentColor"
|
||||
. replaceHexColor "stroke:#000" "stroke:currentColor"
|
||||
. T.replace "stroke:black" "stroke:currentColor"
|
||||
-- 6-digit hex patterns (applied first — bottom of the chain)
|
||||
. T.replace "fill=\"#000000\"" "fill=\"currentColor\""
|
||||
. T.replace "stroke=\"#000000\"" "stroke=\"currentColor\""
|
||||
. T.replace "fill:#000000" "fill:currentColor"
|
||||
. T.replace "stroke:#000000" "stroke:currentColor"
|
||||
. replaceHexColor "fill:#000000" "fill:currentColor"
|
||||
. replaceHexColor "stroke:#000000" "stroke:currentColor"
|
||||
|
||||
-- | 'T.replace' restricted to hex-boundary-terminated matches: an
|
||||
-- occurrence of @needle@ is rewritten only when the character after
|
||||
-- it is not another hex digit, so @fill:#000@ never fires inside the
|
||||
-- longer colours @fill:#0008@, @fill:#000080@, or @fill:#00000080@.
|
||||
replaceHexColor :: T.Text -> T.Text -> T.Text -> T.Text
|
||||
replaceHexColor needle replacement = go
|
||||
where
|
||||
go t =
|
||||
let (pre, rest) = T.breakOn needle t
|
||||
in if T.null rest
|
||||
then pre
|
||||
else
|
||||
let after = T.drop (T.length needle) rest
|
||||
in case T.uncons after of
|
||||
Just (c, _) | isHexDigit c ->
|
||||
pre <> needle <> go after
|
||||
_ -> pre <> replacement <> go after
|
||||
|
||||
buildHtml :: Maybe T.Text -> Maybe T.Text -> T.Text -> T.Text
|
||||
buildHtml mName mCaption svgContent = T.concat
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@
|
|||
-- extra filter logic is needed for that case.
|
||||
--
|
||||
-- The filter is /not/ applied inside headings (where Fira Sans uppercase
|
||||
-- text looks intentional) or inside @Code@/@RawInline@ inlines.
|
||||
-- text looks intentional, at any nesting depth — including headings
|
||||
-- inside divs and block quotes) or inside @Code@/@RawInline@ inlines.
|
||||
module Filters.Smallcaps (apply) where
|
||||
|
||||
import Data.Char (isUpper, isAlpha)
|
||||
|
|
@ -25,13 +26,31 @@ import Text.Pandoc.Walk (walk)
|
|||
import qualified Utils as U
|
||||
|
||||
-- | Apply smallcaps detection to paragraph-level content.
|
||||
-- Skips heading blocks to avoid false positives.
|
||||
-- Heading blocks are skipped at /every/ nesting level (a top-level
|
||||
-- pattern match would miss a @Header@ inside a @Div@ or
|
||||
-- @BlockQuote@): each header's @Str@ content is swapped for a
|
||||
-- sentinel 'RawInline' before the wrapping walk and restored
|
||||
-- afterwards, so 'wrapCaps' can never see it, wherever the header
|
||||
-- sits in the block tree.
|
||||
apply :: Pandoc -> Pandoc
|
||||
apply (Pandoc meta blocks) = Pandoc meta (map applyBlock blocks)
|
||||
apply = walk restoreStr . walk wrapCaps . walk protectHeader
|
||||
|
||||
applyBlock :: Block -> Block
|
||||
applyBlock b@(Header {}) = b -- leave headings untouched
|
||||
applyBlock b = walk wrapCaps b
|
||||
-- | Sentinel format marking a @Str@ that must not be wrapped. It only
|
||||
-- exists between the protect and restore walks inside 'apply' and
|
||||
-- can never leak into the writer.
|
||||
skipFmt :: Format
|
||||
skipFmt = Format "smallcaps-skip"
|
||||
|
||||
protectHeader :: Block -> Block
|
||||
protectHeader (Header lvl attr ils) = Header lvl attr (walk protectStr ils)
|
||||
where
|
||||
protectStr (Str t) = RawInline skipFmt t
|
||||
protectStr x = x
|
||||
protectHeader b = b
|
||||
|
||||
restoreStr :: Inline -> Inline
|
||||
restoreStr (RawInline fmt t) | fmt == skipFmt = Str t
|
||||
restoreStr x = x
|
||||
|
||||
-- | Wrap an all-caps Str token in an abbr element, preserving any trailing
|
||||
-- punctuation (comma, period, colon, semicolon, closing paren/bracket)
|
||||
|
|
|
|||
|
|
@ -19,12 +19,15 @@
|
|||
-- source-preview rule in 'Site.rules') and renders a
|
||||
-- syntax-highlighted snippet via Prism.
|
||||
--
|
||||
-- Conservative-by-design: the trigger only fires on paths under a
|
||||
-- short whitelist of top-level directories, or a small set of named
|
||||
-- root files. This keeps the parser cheap and avoids false positives
|
||||
-- on words that happen to contain a slash and a dot.
|
||||
-- Conservative-by-design: the trigger only fires on paths the
|
||||
-- @/source/@ serving rule actually publishes ('isServedPath', a
|
||||
-- mirror of @sourcePreviewable@ in 'Site.rules'), or a small set of
|
||||
-- named root files. This keeps the parser cheap, avoids false
|
||||
-- positives on words that happen to contain a slash and a dot, and
|
||||
-- guarantees every wrapped path has a fetchable @/source/…@ copy.
|
||||
module Filters.SourceRefs (apply, isSourcePath, forgejoSourceUrl) where
|
||||
|
||||
import Control.Monad (when)
|
||||
import Data.IORef (IORef, atomicModifyIORef', newIORef, readIORef)
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Text (Text)
|
||||
|
|
@ -94,16 +97,17 @@ classifyExistingLink x = pure x
|
|||
-- Heuristic
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
-- | True when the text looks like a repo-relative path under one of
|
||||
-- the whitelisted directories (or is a whitelisted root file), ends
|
||||
-- in a known source extension, and contains only safe path
|
||||
-- characters. Conservative by design — the goal is no false
|
||||
-- positives on prose that incidentally contains a slash and a dot.
|
||||
-- | True when the text looks like a repo-relative path that the
|
||||
-- @/source/@ serving rule actually publishes (or is a whitelisted
|
||||
-- root file), ends in a known source extension, and contains only
|
||||
-- safe path characters. Conservative by design — the goal is no
|
||||
-- false positives on prose that incidentally contains a slash and a
|
||||
-- dot, and no wrapped path whose popup fetch would 404.
|
||||
isSourcePath :: Text -> Bool
|
||||
isSourcePath t = and
|
||||
[ not (T.null t)
|
||||
, T.all safeChar t
|
||||
, (hasKnownPrefix t && hasKnownExt t) || isKnownRootFile t
|
||||
, (isServedPath t && hasKnownExt t) || isKnownRootFile t
|
||||
]
|
||||
where
|
||||
safeChar c =
|
||||
|
|
@ -112,11 +116,26 @@ isSourcePath t = and
|
|||
|| ('0' <= c && c <= '9')
|
||||
|| c == '/' || c == '.' || c == '_' || c == '-' || c == '+'
|
||||
|
||||
hasKnownPrefix :: Text -> Bool
|
||||
hasKnownPrefix t = any (`T.isPrefixOf` t)
|
||||
[ "build/", "static/", "templates/", "tools/"
|
||||
, "nginx/", "data/", "content/", "yaml-source/"
|
||||
-- | Mirror of the @sourcePreviewable@ whitelist in 'Site.rules' (the
|
||||
-- rule that copies files to @/source/<path>@) — the two must stay
|
||||
-- aligned so every link this filter emits has a corresponding
|
||||
-- @/source/…@ target for the popup to fetch. Directories Site.hs
|
||||
-- does not serve (e.g. @content/@) are deliberately absent here:
|
||||
-- wrapping them would emit popups that are guaranteed to 404.
|
||||
isServedPath :: Text -> Bool
|
||||
isServedPath t = or
|
||||
[ "build/" `T.isPrefixOf` t && hasExt ".hs"
|
||||
, "static/js/" `T.isPrefixOf` t
|
||||
, "static/css/" `T.isPrefixOf` t
|
||||
, "templates/" `T.isPrefixOf` t
|
||||
, "tools/" `T.isPrefixOf` t && (hasExt ".sh" || hasExt ".py")
|
||||
, "nginx/" `T.isPrefixOf` t && hasExt ".conf"
|
||||
, "data/" `T.isPrefixOf` t
|
||||
&& not ("/" `T.isInfixOf` T.drop 5 t) -- top-level data files only
|
||||
&& (hasExt ".json" || hasExt ".yaml" || hasExt ".md" || hasExt ".bib")
|
||||
]
|
||||
where
|
||||
hasExt e = e `T.isSuffixOf` T.toLower t
|
||||
|
||||
hasKnownExt :: Text -> Bool
|
||||
hasKnownExt t =
|
||||
|
|
@ -125,7 +144,7 @@ hasKnownExt t =
|
|||
[ ".hs", ".js", ".mjs", ".css", ".html"
|
||||
, ".py", ".cabal", ".md", ".yaml", ".yml"
|
||||
, ".toml", ".sh", ".bash", ".svg", ".conf"
|
||||
, ".json", ".ini", ".tex"
|
||||
, ".json", ".ini", ".tex", ".bib"
|
||||
]
|
||||
|
||||
isKnownRootFile :: Text -> Bool
|
||||
|
|
@ -142,14 +161,19 @@ isKnownRootFile t = t `elem`
|
|||
-- File existence cache
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
-- | Process-wide memo of @doesFileExist@ results, keyed by the same
|
||||
-- path the popup will fetch. Hakyll runs this filter once per
|
||||
-- compiled page and the same source-file references recur across
|
||||
-- | Process-wide memo of /positive/ @doesFileExist@ results, keyed by
|
||||
-- the same path the popup will fetch. Hakyll runs this filter once
|
||||
-- per compiled page and the same source-file references recur across
|
||||
-- many pages (e.g. @build\/Filters\/Links.hs@ in the Links page,
|
||||
-- the Colophon, several essays); the cache turns N stats into one
|
||||
-- per distinct path. The build process's working directory is the
|
||||
-- project root, so the path can be passed straight to
|
||||
-- 'doesFileExist' without prefixing.
|
||||
-- per distinct path. Only existence is memoized: a missing file is
|
||||
-- re-stat'ed on every miss, so a source file created during a
|
||||
-- long-lived @make watch@ session is picked up on the next rebuild
|
||||
-- instead of staying "absent" for the process lifetime. (A file
|
||||
-- /deleted/ mid-watch stays cached as present until restart — the
|
||||
-- benign direction: the popup fetch 404s and simply never appears.)
|
||||
-- The build process's working directory is the project root, so the
|
||||
-- path can be passed straight to 'doesFileExist' without prefixing.
|
||||
{-# NOINLINE existsCacheRef #-}
|
||||
existsCacheRef :: IORef (Map.Map Text Bool)
|
||||
existsCacheRef = unsafePerformIO (newIORef Map.empty)
|
||||
|
|
@ -161,6 +185,7 @@ existsCached path = do
|
|||
Just b -> pure b
|
||||
Nothing -> do
|
||||
b <- doesFileExist (T.unpack path)
|
||||
when b $
|
||||
atomicModifyIORef' existsCacheRef (\m -> (Map.insert path b m, ()))
|
||||
pure b
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,13 @@
|
|||
-- HTML placeholders that transclude.js resolves at runtime.
|
||||
--
|
||||
-- A directive must be the sole content of a line (after trimming) to be
|
||||
-- replaced — this prevents accidental substitution inside prose or code.
|
||||
-- replaced — this prevents accidental substitution inside prose.
|
||||
--
|
||||
-- Code protection (honest scope): lines inside /fenced/ code blocks
|
||||
-- are passed through untouched ('Filters.Wikilinks.mapOutsideFences'),
|
||||
-- so fenced examples can show @{{slug}}@ literally. Indented code
|
||||
-- blocks and inline code spans are NOT recognised — a full-line
|
||||
-- directive inside either is still rewritten.
|
||||
--
|
||||
-- Examples:
|
||||
-- {{my-essay}} → full-page transclusion of /my-essay.html
|
||||
|
|
@ -14,11 +20,13 @@
|
|||
module Filters.Transclusion (preprocess) where
|
||||
|
||||
import Data.List (isSuffixOf, isPrefixOf, stripPrefix)
|
||||
import Filters.Wikilinks (mapOutsideFences)
|
||||
import qualified Utils as U
|
||||
|
||||
-- | Apply transclusion substitution to the raw Markdown source string.
|
||||
-- | Apply transclusion substitution to the raw Markdown source string,
|
||||
-- skipping lines inside fenced code blocks.
|
||||
preprocess :: String -> String
|
||||
preprocess = unlines . map processLine . lines
|
||||
preprocess = mapOutsideFences processLine
|
||||
|
||||
processLine :: String -> String
|
||||
processLine line =
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@
|
|||
module Filters.Viz (inlineViz) where
|
||||
|
||||
import Control.Exception (IOException, catch)
|
||||
import Data.Char (isHexDigit)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Text as T
|
||||
import System.Directory (doesFileExist)
|
||||
|
|
@ -117,20 +118,47 @@ runScript baseDir attrs =
|
|||
|
||||
-- | Replace hardcoded black fill/stroke values with @currentColor@ so the
|
||||
-- embedded SVG inherits the CSS text colour in both light and dark modes.
|
||||
--
|
||||
-- Quoted attribute forms (@fill="#000"@) are self-delimiting — the
|
||||
-- closing quote bounds the match — so plain 'T.replace' is safe for
|
||||
-- them. Unquoted style-property forms (@fill:#000@) are not: naive
|
||||
-- replacement would also fire on the prefix of a longer hex colour
|
||||
-- (@fill:#000080@ → @fill:currentColor80@, invalid CSS). Those go
|
||||
-- through 'replaceHexColor', which rewrites a match only when it is
|
||||
-- not followed by another hex digit.
|
||||
processColors :: T.Text -> T.Text
|
||||
processColors
|
||||
= T.replace "fill=\"#000\"" "fill=\"currentColor\""
|
||||
. T.replace "fill=\"black\"" "fill=\"currentColor\""
|
||||
. T.replace "stroke=\"#000\"" "stroke=\"currentColor\""
|
||||
. T.replace "stroke=\"black\"" "stroke=\"currentColor\""
|
||||
. T.replace "fill:#000" "fill:currentColor"
|
||||
. replaceHexColor "fill:#000" "fill:currentColor"
|
||||
. T.replace "fill:black" "fill:currentColor"
|
||||
. T.replace "stroke:#000" "stroke:currentColor"
|
||||
. replaceHexColor "stroke:#000" "stroke:currentColor"
|
||||
. T.replace "stroke:black" "stroke:currentColor"
|
||||
. T.replace "fill=\"#000000\"" "fill=\"currentColor\""
|
||||
. T.replace "stroke=\"#000000\"" "stroke=\"currentColor\""
|
||||
. T.replace "fill:#000000" "fill:currentColor"
|
||||
. T.replace "stroke:#000000" "stroke:currentColor"
|
||||
. replaceHexColor "fill:#000000" "fill:currentColor"
|
||||
. replaceHexColor "stroke:#000000" "stroke:currentColor"
|
||||
|
||||
-- | 'T.replace' restricted to hex-boundary-terminated matches: an
|
||||
-- occurrence of @needle@ is rewritten only when the character after
|
||||
-- it is not another hex digit, so @fill:#000@ never fires inside the
|
||||
-- longer colours @fill:#0008@, @fill:#000080@, or @fill:#00000080@.
|
||||
-- (Mirrors 'Filters.Score.replaceHexColor'.)
|
||||
replaceHexColor :: T.Text -> T.Text -> T.Text -> T.Text
|
||||
replaceHexColor needle replacement = go
|
||||
where
|
||||
go t =
|
||||
let (pre, rest) = T.breakOn needle t
|
||||
in if T.null rest
|
||||
then pre
|
||||
else
|
||||
let after = T.drop (T.length needle) rest
|
||||
in case T.uncons after of
|
||||
Just (c, _) | isHexDigit c ->
|
||||
pre <> needle <> go after
|
||||
_ -> pre <> replacement <> go after
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- JSON safety for <script> embedding
|
||||
|
|
|
|||
|
|
@ -12,23 +12,129 @@
|
|||
-- replaced with hyphens, non-alphanumeric characters stripped, and
|
||||
-- a @.html@ suffix appended so the link resolves identically under
|
||||
-- the dev server, file:// previews, and nginx in production.
|
||||
module Filters.Wikilinks (preprocess) where
|
||||
--
|
||||
-- Code protection (honest scope): lines inside /fenced/ code blocks
|
||||
-- are passed through untouched (see 'mapOutsideFences'), and within a
|
||||
-- line, inline code spans (backtick runs, CommonMark equal-length
|
||||
-- matching) are skipped — so both fenced and @`inline`@ examples can
|
||||
-- show @[[…]]@ literally. Indented code blocks and code spans that
|
||||
-- cross a line break are NOT recognised; a wikilink inside those is
|
||||
-- still rewritten.
|
||||
module Filters.Wikilinks (preprocess, mapOutsideFences) where
|
||||
|
||||
import Data.Char (isAlphaNum, toLower, isSpace)
|
||||
import Data.List (intercalate)
|
||||
import qualified Utils as U
|
||||
|
||||
-- | Scan the raw Markdown source for @[[…]]@ wikilinks and replace them
|
||||
-- with standard Markdown link syntax.
|
||||
-- with standard Markdown link syntax. Processing is line-by-line and
|
||||
-- skips fenced code blocks; a wikilink therefore cannot span a line
|
||||
-- break (which was never a sensible authoring form).
|
||||
preprocess :: String -> String
|
||||
preprocess [] = []
|
||||
preprocess ('[':'[':rest) =
|
||||
preprocess = mapOutsideFences replaceWikilinks
|
||||
|
||||
replaceWikilinks :: String -> String
|
||||
replaceWikilinks = go
|
||||
where
|
||||
go [] = []
|
||||
-- Inline code span: a backtick run opens a span closed by a run of
|
||||
-- exactly the same length (CommonMark). Its body passes through
|
||||
-- verbatim so documentation can quote @`[[…]]`@ literally. An
|
||||
-- unclosed run is literal text — and then a following @[[…]]@ is
|
||||
-- genuinely a wikilink, matching how Pandoc will read the line.
|
||||
go s@('`':_) =
|
||||
let (run, afterRun) = span (== '`') s
|
||||
in case codeSpan (length run) afterRun of
|
||||
Just (body, after) -> run ++ body ++ run ++ go after
|
||||
Nothing -> run ++ go afterRun
|
||||
go ('[':'[':rest) =
|
||||
case break (== ']') rest of
|
||||
(inner, ']':']':after)
|
||||
| not (null inner) ->
|
||||
toMarkdownLink inner ++ preprocess after
|
||||
_ -> '[' : '[' : preprocess rest
|
||||
preprocess (c:rest) = c : preprocess rest
|
||||
toMarkdownLink inner ++ go after
|
||||
_ -> '[' : '[' : go rest
|
||||
go (c:rest) = c : go rest
|
||||
|
||||
-- @codeSpan n s@: the span body and the remainder after a closing
|
||||
-- run of exactly @n@ backticks; 'Nothing' when no closer exists on
|
||||
-- this line.
|
||||
codeSpan :: Int -> String -> Maybe (String, String)
|
||||
codeSpan n = loop
|
||||
where
|
||||
loop [] = Nothing
|
||||
loop s@('`':_) =
|
||||
let (run, rest) = span (== '`') s
|
||||
in if length run == n
|
||||
then Just ("", rest)
|
||||
else prepend run <$> loop rest
|
||||
loop (c:cs) = prepend [c] <$> loop cs
|
||||
prepend pre (body, after) = (pre ++ body, after)
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Fence-aware line mapping (shared by all source-level preprocessors)
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
-- | Apply a line transformation to every line that is not part of a
|
||||
-- fenced code block. Shared by the three source-level preprocessors
|
||||
-- (wikilinks here, 'Filters.Transclusion', 'Filters.EmbedPdf') so
|
||||
-- their directive syntax can be quoted literally inside fenced code.
|
||||
--
|
||||
-- Fence tracking follows CommonMark: an opener is at most three
|
||||
-- spaces of indentation followed by a run of at least three backticks
|
||||
-- or tildes (longer runs allowed); for backtick fences the info
|
||||
-- string may not contain a backtick. The closer uses the same fence
|
||||
-- character, a run at least as long as the opener, and nothing but
|
||||
-- whitespace after it. An unclosed fence extends to the end of the
|
||||
-- document. Fence delimiter lines themselves pass through untouched.
|
||||
--
|
||||
-- Honest scope: only /fenced/ code blocks are protected. Indented
|
||||
-- code blocks and inline code spans are not recognised here — a
|
||||
-- directive inside either is still rewritten.
|
||||
mapOutsideFences :: (String -> String) -> String -> String
|
||||
mapOutsideFences f = unlines . go Nothing . lines
|
||||
where
|
||||
go _ [] = []
|
||||
go Nothing (l:ls) =
|
||||
case openingFence l of
|
||||
Just fence -> l : go (Just fence) ls
|
||||
Nothing -> f l : go Nothing ls
|
||||
go st@(Just fence) (l:ls)
|
||||
| closesFence fence l = l : go Nothing ls
|
||||
| otherwise = l : go st ls
|
||||
|
||||
-- | The fence character and run length of a CommonMark fence opener,
|
||||
-- or 'Nothing' when the line does not open a fence.
|
||||
openingFence :: String -> Maybe (Char, Int)
|
||||
openingFence l = do
|
||||
rest <- stripFenceIndent l
|
||||
case rest of
|
||||
(c:_) | c == '`' || c == '~' ->
|
||||
let run = takeWhile (== c) rest
|
||||
n = length run
|
||||
info = drop n rest
|
||||
in if n >= 3 && (c == '~' || '`' `notElem` info)
|
||||
then Just (c, n)
|
||||
else Nothing
|
||||
_ -> Nothing
|
||||
|
||||
-- | True when the line closes the fence opened by @(c, n)@: the same
|
||||
-- fence character, a run at least as long as the opener, and only
|
||||
-- whitespace after it.
|
||||
closesFence :: (Char, Int) -> String -> Bool
|
||||
closesFence (c, n) l =
|
||||
case stripFenceIndent l of
|
||||
Nothing -> False
|
||||
Just rest ->
|
||||
let run = takeWhile (== c) rest
|
||||
in length run >= n && all isSpace (drop (length run) rest)
|
||||
|
||||
-- | Strip up to three leading spaces (the indentation CommonMark allows
|
||||
-- on a fence line); 'Nothing' for four or more, which would be an
|
||||
-- indented code block rather than a fence.
|
||||
stripFenceIndent :: String -> Maybe String
|
||||
stripFenceIndent l =
|
||||
let (indent, rest) = span (== ' ') l
|
||||
in if length indent <= 3 then Just rest else Nothing
|
||||
|
||||
-- | Convert the inner content of @[[…]]@ to a Markdown link.
|
||||
--
|
||||
|
|
|
|||
Loading…
Reference in New Issue