170 lines
7.4 KiB
Haskell
170 lines
7.4 KiB
Haskell
{-# LANGUAGE GHC2021 #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
-- | Similar-links field: injects a "Related" list into essay/page contexts.
|
|
--
|
|
-- @data/similar-links.json@ is produced by @tools/embed.py@ at build time
|
|
-- (called from the Makefile after pagefind, before sign). It is a plain
|
|
-- JSON object mapping root-relative URL paths to lists of similar pages:
|
|
--
|
|
-- { "/essays/my-essay/": [{"url": "...", "title": "...", "score": 0.87}] }
|
|
--
|
|
-- This module loads that file with dependency tracking (so pages recompile
|
|
-- when embeddings change) and provides @similarLinksField@, which resolves
|
|
-- to an HTML list for the current page's URL.
|
|
--
|
|
-- If the file is absent (e.g. @.venv@ not set up, or first build) the field
|
|
-- returns @noResult@ — the @$if(similar-links)$@ guard in the template is
|
|
-- false and no "Related" section is rendered.
|
|
module SimilarLinks (similarLinksField) where
|
|
|
|
import Data.Maybe (fromMaybe)
|
|
import qualified Data.ByteString as BS
|
|
import qualified Data.Map.Strict as Map
|
|
import Data.Map.Strict (Map)
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.Encoding as TE
|
|
import qualified Data.Text.Encoding.Error as TE
|
|
import qualified Data.Aeson as Aeson
|
|
import Hakyll
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- JSON schema
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
data SimilarEntry = SimilarEntry
|
|
{ seUrl :: String
|
|
, seTitle :: String
|
|
, seScore :: Double
|
|
} deriving (Show)
|
|
|
|
instance Aeson.FromJSON SimilarEntry where
|
|
parseJSON = Aeson.withObject "SimilarEntry" $ \o ->
|
|
SimilarEntry
|
|
<$> o Aeson..: "url"
|
|
<*> o Aeson..: "title"
|
|
<*> o Aeson..: "score"
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Context field
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Maximum entries rendered in the "Related" block. The on-disk JSON may
|
|
-- contain more (embed.py's TOP_N); the template caps the display.
|
|
maxSimilar :: Int
|
|
maxSimilar = 3
|
|
|
|
-- | Provides @$similar-links$@ (HTML list) and @$has-similar-links$@
|
|
-- (boolean flag for template guards).
|
|
-- Returns @noResult@ when the JSON file is absent, unparseable, or the
|
|
-- current page has no similar entries.
|
|
--
|
|
-- Note on normalisation: 'tools/embed.py' emits map keys using the live
|
|
-- site URL (e.g. @/essays/foo.html@ for a flat page, @/essays/foo/@ for a
|
|
-- directory-index page), while Hakyll's route gives @essays/foo.html@.
|
|
-- 'normaliseUrl' collapses both forms to a canonical stem, and we apply
|
|
-- it to every JSON key on load so the lookup cannot miss.
|
|
similarLinksField :: Context String
|
|
similarLinksField = field "similar-links" $ \item -> do
|
|
-- Load with dependency tracking — pages recompile when the JSON changes.
|
|
slItem <- load (fromFilePath "data/similar-links.json") :: Compiler (Item String)
|
|
case Aeson.decodeStrict (TE.encodeUtf8 (T.pack (itemBody slItem)))
|
|
:: Maybe (Map T.Text [SimilarEntry]) of
|
|
Nothing -> fail "similar-links: could not parse data/similar-links.json"
|
|
Just rawMap -> do
|
|
mRoute <- getRoute (itemIdentifier item)
|
|
case mRoute of
|
|
Nothing -> fail "similar-links: item has no route"
|
|
Just r ->
|
|
let normMap = Map.mapKeys (T.pack . normaliseUrl . T.unpack) rawMap
|
|
key = T.pack (normaliseUrl ("/" ++ r))
|
|
entries = take maxSimilar (fromMaybe [] (Map.lookup key normMap))
|
|
in if null entries
|
|
then fail "no similar links"
|
|
else return (renderSimilarLinks entries)
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- URL normalisation (mirrors embed.py's URL derivation)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
normaliseUrl :: String -> String
|
|
normaliseUrl url =
|
|
let t = T.pack url
|
|
-- strip query + fragment
|
|
t1 = fst (T.breakOn "?" (fst (T.breakOn "#" t)))
|
|
-- ensure leading slash
|
|
t2 = if T.isPrefixOf "/" t1 then t1 else "/" `T.append` t1
|
|
-- strip trailing index.html → keep the directory slash
|
|
t3 = fromMaybe t2 (T.stripSuffix "index.html" t2)
|
|
-- strip bare .html extension only for non-index pages
|
|
t4 = fromMaybe t3 (T.stripSuffix ".html" t3)
|
|
in percentDecode (T.unpack t4)
|
|
|
|
-- | Percent-decode @%XX@ escapes (UTF-8) so percent-encoded paths
|
|
-- collide with their decoded form on map lookup. Mirrors
|
|
-- 'Backlinks.percentDecode'; the two implementations are intentionally
|
|
-- duplicated because they apply different normalisations *before*
|
|
-- decoding (Backlinks strips @.html@ unconditionally; SimilarLinks
|
|
-- preserves the trailing-slash form for index pages).
|
|
percentDecode :: String -> String
|
|
percentDecode = T.unpack . TE.decodeUtf8With TE.lenientDecode . BS.pack . go
|
|
where
|
|
go [] = []
|
|
go ('%':a:b:rest)
|
|
| Just hi <- hexDigit a
|
|
, Just lo <- hexDigit b
|
|
= fromIntegral (hi * 16 + lo) : go rest
|
|
go (c:rest) = fromIntegral (fromEnum c) : go rest
|
|
|
|
hexDigit c
|
|
| c >= '0' && c <= '9' = Just (fromEnum c - fromEnum '0')
|
|
| c >= 'a' && c <= 'f' = Just (fromEnum c - fromEnum 'a' + 10)
|
|
| c >= 'A' && c <= 'F' = Just (fromEnum c - fromEnum 'A' + 10)
|
|
| otherwise = Nothing
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- HTML rendering
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Render the Related block. Each anchor gets:
|
|
-- * @class="similar-link"@ — whitelist for popups.js so the default
|
|
-- footer-exclusion does not fire (content preview on hover).
|
|
-- * @data-link-icon@ / @data-link-icon-type@ — page or document icon,
|
|
-- rendered via the existing a[data-link-icon] mask-image system in
|
|
-- typography.css.
|
|
-- * For PDFs: @class="pdf-link"@ + @data-pdf-src@ + href rewritten to
|
|
-- the PDF.js viewer, matching the rest of the site. The pdfContent
|
|
-- provider in popups.js binds on @.pdf-link[data-pdf-src]@.
|
|
renderSimilarLinks :: [SimilarEntry] -> String
|
|
renderSimilarLinks entries =
|
|
"<ul class=\"similar-links-list\">\n"
|
|
++ concatMap renderOne entries
|
|
++ "</ul>"
|
|
where
|
|
renderOne se
|
|
| isPdfUrl (seUrl se) = renderPdf se
|
|
| otherwise = renderPage se
|
|
|
|
renderPage se =
|
|
"<li class=\"similar-links-item\">"
|
|
++ "<a class=\"similar-link\""
|
|
++ " href=\"" ++ escapeHtml (seUrl se) ++ "\""
|
|
++ " data-link-icon=\"internal\" data-link-icon-type=\"svg\">"
|
|
++ escapeHtml (seTitle se)
|
|
++ "</a></li>\n"
|
|
|
|
renderPdf se =
|
|
let raw = seUrl se
|
|
viewerUrl = "/pdfjs/web/viewer.html?file=" ++ escapeHtml raw
|
|
in "<li class=\"similar-links-item\">"
|
|
++ "<a class=\"similar-link pdf-link\""
|
|
++ " href=\"" ++ viewerUrl ++ "\""
|
|
++ " data-pdf-src=\"" ++ escapeHtml raw ++ "\""
|
|
++ " data-link-icon=\"document\" data-link-icon-type=\"svg\">"
|
|
++ escapeHtml (seTitle se)
|
|
++ "</a></li>\n"
|
|
|
|
isPdfUrl u =
|
|
let lower = T.toLower (T.pack u)
|
|
(path, _) = T.break (== '#') lower
|
|
in ".pdf" `T.isSuffixOf` path
|