88 lines
3.5 KiB
Haskell
88 lines
3.5 KiB
Haskell
{-# LANGUAGE GHC2021 #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
-- | Wikilink syntax preprocessor.
|
|
--
|
|
-- Applied to the raw Markdown source string /before/ Pandoc parsing.
|
|
-- Transforms:
|
|
--
|
|
-- * @[[Page Title]]@ → @[Page Title](/page-title)@
|
|
-- * @[[Page Title|Display]]@ → @[Display](/page-title)@
|
|
--
|
|
-- The URL slug is derived from the page title: lowercased, spaces
|
|
-- replaced with hyphens, non-alphanumeric characters stripped, and
|
|
-- a @.html@ suffix appended so the link resolves identically under
|
|
-- the dev server, file:// previews, and nginx in production.
|
|
module Filters.Wikilinks (preprocess) where
|
|
|
|
import Data.Char (isAlphaNum, toLower, isSpace)
|
|
import Data.List (intercalate)
|
|
import qualified Utils as U
|
|
|
|
-- | Scan the raw Markdown source for @[[…]]@ wikilinks and replace them
|
|
-- with standard Markdown link syntax.
|
|
preprocess :: String -> String
|
|
preprocess [] = []
|
|
preprocess ('[':'[':rest) =
|
|
case break (== ']') rest of
|
|
(inner, ']':']':after)
|
|
| not (null inner) ->
|
|
toMarkdownLink inner ++ preprocess after
|
|
_ -> '[' : '[' : preprocess rest
|
|
preprocess (c:rest) = c : preprocess rest
|
|
|
|
-- | Convert the inner content of @[[…]]@ to a Markdown link.
|
|
--
|
|
-- Display text is escaped via 'escMdLinkText' so that a literal @]@, @[@,
|
|
-- or backslash in the display does not break the surrounding Markdown
|
|
-- link syntax. The URL itself is produced by 'slugify' and therefore only
|
|
-- ever contains @[a-z0-9-]@, so no URL-side encoding is needed — adding
|
|
-- one would be defense against a character set we can't produce.
|
|
toMarkdownLink :: String -> String
|
|
toMarkdownLink inner =
|
|
let (title, display) = splitOnPipe inner
|
|
url = "/" ++ slugify title ++ ".html"
|
|
in "[" ++ escMdLinkText display ++ "](" ++ url ++ ")"
|
|
|
|
-- | Escape the minimum set of characters that would prematurely terminate
|
|
-- a Markdown link's display-text segment: backslash (escape char), @[@,
|
|
-- and @]@. Backslash MUST be escaped first so the escapes we introduce
|
|
-- for @[@ and @]@ are not themselves re-escaped.
|
|
--
|
|
-- Deliberately NOT escaped: @_@, @*@, @\`@, @<@. Those are inline
|
|
-- formatting markers in Markdown and escaping them would strip the
|
|
-- author's ability to put emphasis, code, or inline HTML in a wikilink's
|
|
-- display text.
|
|
escMdLinkText :: String -> String
|
|
escMdLinkText = concatMap esc
|
|
where
|
|
esc '\\' = "\\\\"
|
|
esc '[' = "\\["
|
|
esc ']' = "\\]"
|
|
esc c = [c]
|
|
|
|
-- | Split on the first @|@; if none, display = title.
|
|
splitOnPipe :: String -> (String, String)
|
|
splitOnPipe s =
|
|
case break (== '|') s of
|
|
(title, '|':display) -> (U.trim title, U.trim display)
|
|
_ -> (U.trim s, U.trim s)
|
|
|
|
-- | Produce a URL slug: lowercase, words joined by hyphens,
|
|
-- non-alphanumeric characters removed.
|
|
--
|
|
-- Trailing punctuation is dropped rather than preserved as a dangling
|
|
-- hyphen — @slugify "end." == "end"@, not @"end-"@. This is intentional:
|
|
-- author-authored wikilinks tend to end sentences with a period and the
|
|
-- desired URL is almost always the terminal-punctuation-free form.
|
|
slugify :: String -> String
|
|
slugify = intercalate "-" . words . map toLowerAlnum
|
|
where
|
|
toLowerAlnum c
|
|
| isAlphaNum c = toLower c
|
|
| isSpace c = ' '
|
|
| c == '-' = '-'
|
|
| otherwise = ' ' -- replace punctuation with a space so words
|
|
-- split correctly and double-hyphens are
|
|
-- collapsed by 'words'
|
|
|