From f41311a3eb248e2801dfe0f845d117838755e3da Mon Sep 17 00:00:00 2001 From: Levi Neuwirth Date: Sat, 2 May 2026 10:40:43 -0400 Subject: [PATCH] Inline code reference previews --- build/Filters.hs | 6 +- build/Filters/Links.hs | 6 ++ build/Filters/SourceRefs.hs | 193 ++++++++++++++++++++++++++++++++++++ build/Site.hs | 44 ++++++++ content/links.md | 9 +- levineuwirth.cabal | 1 + static/css/popups.css | 90 +++++++++++++++++ static/cv.pdf | Bin 43688 -> 43550 bytes static/js/popups.js | 112 +++++++++++++++++++++ static/resume.pdf | Bin 31866 -> 31827 bytes 10 files changed, 455 insertions(+), 6 deletions(-) create mode 100644 build/Filters/SourceRefs.hs diff --git a/build/Filters.hs b/build/Filters.hs index 0f532e1..b6fbd71 100644 --- a/build/Filters.hs +++ b/build/Filters.hs @@ -11,6 +11,7 @@ import Text.Pandoc.Definition (Pandoc) import qualified Filters.Sidenotes as Sidenotes import qualified Filters.Typography as Typography import qualified Filters.Links as Links +import qualified Filters.SourceRefs as SourceRefs import qualified Filters.Smallcaps as Smallcaps import qualified Filters.Dropcaps as Dropcaps import qualified Filters.Math as Math @@ -32,7 +33,8 @@ import qualified Filters.Aftermatter as Aftermatter -- resolution of co-located assets. applyAll :: FilePath -> Pandoc -> IO Pandoc applyAll srcDir doc = do - imagesDone <- Images.apply srcDir doc + imagesDone <- Images.apply srcDir doc + sourceRefsDone <- SourceRefs.apply imagesDone pure . Aftermatter.apply . Sidenotes.apply @@ -42,7 +44,7 @@ applyAll srcDir doc = do . Dropcaps.apply . Math.apply . Code.apply - $ imagesDone + $ sourceRefsDone -- | Apply source-level preprocessors to the raw Markdown string. -- Order matters: EmbedPdf must run before Transclusion, because the diff --git a/build/Filters/Links.hs b/build/Filters/Links.hs index d259048..6047d5a 100644 --- a/build/Filters/Links.hs +++ b/build/Filters/Links.hs @@ -43,6 +43,12 @@ classifyPdfLink (Link (ident, classes, kvs) ils (url, title)) classifyPdfLink x = x classifyLink :: Inline -> Inline +classifyLink l@(Link (_, classes, _) _ _) + -- Source-ref links are owned by Filters.SourceRefs: they keep the + -- inline-code chrome of their body, must not receive an external + -- brand icon stamp, and have their own popup provider. Leave them + -- entirely alone. + | "source-ref" `elem` classes = l classifyLink (Link (ident, classes, kvs) ils (url, title)) | isExternal url = let icon = domainIcon url diff --git a/build/Filters/SourceRefs.hs b/build/Filters/SourceRefs.hs new file mode 100644 index 0000000..64656bb --- /dev/null +++ b/build/Filters/SourceRefs.hs @@ -0,0 +1,193 @@ +{-# LANGUAGE GHC2021 #-} +{-# LANGUAGE OverloadedStrings #-} +-- | Detect repo-relative source-file references in prose and wrap them +-- in a link that triggers a hover-preview popup of the file's contents. +-- +-- Two trigger forms: +-- +-- * Inline @\`build\/Filters\/Links.hs\`@ — Markdown inline code whose +-- text passes a conservative source-path heuristic. +-- * A Markdown link to +-- @https:\/\/git.levineuwirth.org\/neuwirth\/levineuwirth.org\/(src|raw)\/branch\/\/@. +-- +-- Both produce +-- @\@. The href +-- points to the Forgejo source viewer so a click without JS — or a +-- popup that fails to fetch — still resolves to a useful target. +-- The popup provider in @static\/js\/popups.js@ fetches +-- @\/source\/\@ (a same-origin copy emitted by the Hakyll +-- source-preview rule in 'Site.rules') and renders a +-- syntax-highlighted snippet via Prism. +-- +-- Conservative-by-design: the trigger only fires on paths under a +-- short whitelist of top-level directories, or a small set of named +-- root files. This keeps the parser cheap and avoids false positives +-- on words that happen to contain a slash and a dot. +module Filters.SourceRefs (apply, isSourcePath, forgejoSourceUrl) where + +import Data.IORef (IORef, atomicModifyIORef', newIORef, readIORef) +import qualified Data.Map.Strict as Map +import Data.Text (Text) +import qualified Data.Text as T +import System.Directory (doesFileExist) +import System.IO.Unsafe (unsafePerformIO) +import Text.Pandoc.Definition +import Text.Pandoc.Walk (walkM) + +-- | Two passes: lift Forgejo source URLs in existing Markdown links +-- first, then wrap inline-code source paths. Both passes only add +-- the @source-ref@ class when it is not already present, so re-runs +-- are idempotent. +-- +-- Runs in 'IO' because the heuristic confirms each candidate is a +-- real on-disk file before wrapping. This rules out paths like +-- @data/backlinks.json@ that look like source but are Hakyll build +-- artifacts produced into @_site/@ — wrapping those would emit a +-- link whose popup is guaranteed to 404. +apply :: Pandoc -> IO Pandoc +apply doc = do + afterLinks <- walkM classifyExistingLink doc + walkM wrapInlineCode afterLinks + +-- | Inline @`path`@ → @\\path\<\/code\>\<\/a\>@. +-- The original 'Code' node is preserved as the link's body so the +-- inline-code chrome (mono font, background) survives unchanged. +wrapInlineCode :: Inline -> IO Inline +wrapInlineCode orig@(Code (cIdent, cClasses, cKvs) txt) + | "source-ref" `notElem` cClasses + , isSourcePath txt = do + exists <- existsCached txt + if exists + then pure $ Link + ( "" + , ["source-ref"] + , [ ("data-source-path", txt) + , ("target", "_blank") + , ("rel", "noopener noreferrer") + ] + ) + [Code (cIdent, cClasses, cKvs) txt] + (forgejoSourceUrl txt, "") + else pure orig +wrapInlineCode x = pure x + +-- | Existing Markdown link to a Forgejo source URL on this site's git +-- host → tagged @source-ref@ and given a @data-source-path@ pointing +-- at the same path the popup provider expects. +classifyExistingLink :: Inline -> IO Inline +classifyExistingLink orig@(Link (ident, classes, kvs) ils (url, title)) + | "source-ref" `notElem` classes + , Just path <- forgejoSourcePath url + , isSourcePath path = do + exists <- existsCached path + if exists + then pure $ Link + ( ident + , classes ++ ["source-ref"] + , kvs ++ [("data-source-path", path)] + ) + ils (url, title) + else pure orig +classifyExistingLink x = pure x + +-- --------------------------------------------------------------------------- +-- Heuristic +-- --------------------------------------------------------------------------- + +-- | True when the text looks like a repo-relative path under one of +-- the whitelisted directories (or is a whitelisted root file), ends +-- in a known source extension, and contains only safe path +-- characters. Conservative by design — the goal is no false +-- positives on prose that incidentally contains a slash and a dot. +isSourcePath :: Text -> Bool +isSourcePath t = and + [ not (T.null t) + , T.all safeChar t + , (hasKnownPrefix t && hasKnownExt t) || isKnownRootFile t + ] + where + safeChar c = + ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z') + || ('0' <= c && c <= '9') + || c == '/' || c == '.' || c == '_' || c == '-' || c == '+' + +hasKnownPrefix :: Text -> Bool +hasKnownPrefix t = any (`T.isPrefixOf` t) + [ "build/", "static/", "templates/", "tools/" + , "nginx/", "data/", "content/", "yaml-source/" + ] + +hasKnownExt :: Text -> Bool +hasKnownExt t = + let lower = T.toLower t + in any (`T.isSuffixOf` lower) + [ ".hs", ".js", ".mjs", ".css", ".html" + , ".py", ".cabal", ".md", ".yaml", ".yml" + , ".toml", ".sh", ".bash", ".svg", ".conf" + , ".json", ".ini", ".tex" + ] + +isKnownRootFile :: Text -> Bool +isKnownRootFile t = t `elem` + [ "Makefile" + , "levineuwirth.cabal" + , "cabal.project", "cabal.project.freeze" + , "pyproject.toml", "uv.lock" + , "WRITING.md", "HOMEPAGE.md", "PHOTOGRAPHY.md", "README.md" + , "LICENSE", "checklist.md" + ] + +-- --------------------------------------------------------------------------- +-- File existence cache +-- --------------------------------------------------------------------------- + +-- | Process-wide memo of @doesFileExist@ results, keyed by the same +-- path the popup will fetch. Hakyll runs this filter once per +-- compiled page and the same source-file references recur across +-- many pages (e.g. @build\/Filters\/Links.hs@ in the Links page, +-- the Colophon, several essays); the cache turns N stats into one +-- per distinct path. The build process's working directory is the +-- project root, so the path can be passed straight to +-- 'doesFileExist' without prefixing. +{-# NOINLINE existsCacheRef #-} +existsCacheRef :: IORef (Map.Map Text Bool) +existsCacheRef = unsafePerformIO (newIORef Map.empty) + +existsCached :: Text -> IO Bool +existsCached path = do + cache <- readIORef existsCacheRef + case Map.lookup path cache of + Just b -> pure b + Nothing -> do + b <- doesFileExist (T.unpack path) + atomicModifyIORef' existsCacheRef (\m -> (Map.insert path b m, ())) + pure b + +-- --------------------------------------------------------------------------- +-- Forgejo URL helpers +-- --------------------------------------------------------------------------- + +-- | Forgejo source-viewer URL for a repo-relative path. Pinned to the +-- @main@ branch so previews always reflect the deployed tip. +forgejoSourceUrl :: Text -> Text +forgejoSourceUrl path = + "https://git.levineuwirth.org/neuwirth/levineuwirth.org/src/branch/main/" + <> path + +-- | Inverse of 'forgejoSourceUrl': extract the repo-relative path from +-- a Forgejo URL on this site's git host. Recognises both the +-- @\/src\/branch\/\/@ web view and the @\/raw\/branch\/\/@ +-- variants. Returns 'Nothing' for any other URL. +forgejoSourcePath :: Text -> Maybe Text +forgejoSourcePath url = do + rest <- T.stripPrefix repoBase url + afterBranch <- + case T.stripPrefix "src/branch/" rest of + Just r -> Just r + Nothing -> T.stripPrefix "raw/branch/" rest + let (_branch, slashAndPath) = T.breakOn "/" afterBranch + path = T.drop 1 slashAndPath + if T.null path then Nothing else Just path + where + repoBase = "https://git.levineuwirth.org/neuwirth/levineuwirth.org/" diff --git a/build/Site.hs b/build/Site.hs index 32419bf..6c9d9a4 100644 --- a/build/Site.hs +++ b/build/Site.hs @@ -197,6 +197,50 @@ rules = do -- Templates match "templates/**" $ compile templateBodyCompiler + -- --------------------------------------------------------------------------- + -- Source-preview corpus — raw copies of source files, served at + -- @/source/@, fetched on hover by the popup provider in + -- @static/js/popups.js@ (sourceContent → Prism highlighting). + -- + -- Conservative whitelist: must stay aligned with 'isSourcePath' in + -- @build/Filters/SourceRefs.hs@ so that every link the filter + -- emits has a corresponding @/source/…@ target. Files in @static/@ + -- are also served under their normal /js/, /css/ paths via a + -- separate rule above; the @"source-preview"@ version lets Hakyll + -- compile the same identifier twice without conflict. + -- + -- Anything not matched here will silently 404 on hover and the + -- popup will simply not appear, which is the right failure mode + -- if the heuristic ever wraps a path we did not mean to expose. + -- --------------------------------------------------------------------------- + let sourcePreviewable = + "build/**.hs" + .||. "static/js/**" + .||. "static/css/**" + .||. "templates/**" + .||. "tools/**.sh" + .||. "tools/**.py" + .||. "nginx/**.conf" + .||. "data/*.json" + .||. "data/*.yaml" + .||. "data/*.md" + .||. "data/*.bib" + .||. "*.cabal" + .||. "cabal.project" + .||. "cabal.project.freeze" + .||. "Makefile" + .||. "pyproject.toml" + .||. "uv.lock" + .||. "LICENSE" + .||. "checklist.md" + .||. "WRITING.md" + .||. "HOMEPAGE.md" + .||. "PHOTOGRAPHY.md" + .||. "README.md" + match sourcePreviewable $ version "source-preview" $ do + route $ customRoute (\ident -> "source/" ++ toFilePath ident) + compile copyFileCompiler + -- Link annotations — author-defined previews for any URL match "data/annotations.json" $ do route idRoute diff --git a/content/links.md b/content/links.md index 048ae9f..0d696c6 100644 --- a/content/links.md +++ b/content/links.md @@ -13,7 +13,7 @@ beside each link are stamped automatically by the build's link classifier ## Academic { #academic } - [Brown CS](https://cs.brown.edu/people/ugrad/lneuwirt/) - +- [Google Scholar](https://scholar.google.com/citations?user=9_62MFgAAAAJ&hl=en&oi=ao) - [ORCID](https://orcid.org/0000-0000-0000-0000) @@ -22,7 +22,7 @@ beside each link are stamped automatically by the build's link classifier ## Artistic { #artistic } -Coming soon! +- [YouTube](https://www.youtube.com/@levineuwirth) @@ -31,7 +31,6 @@ Coming soon! ## Code { #code } - [Forgejo](https://git.levineuwirth.org/neuwirth) - - [GitHub](https://github.com/levineuwirth) @@ -41,7 +40,9 @@ Coming soon! ## Miscellaneous { #miscellaneous } - [English Wikipedia](https://en.wikipedia.org/wiki/User:LudicrousSengir) - +- [French Wikipedia](https://fr.wikipedia.org/wiki/Utilisateur:LudicrousSengir) +- [iNaturalist](https://inaturalist.org/people/lneuwirth) +- [Spanish Wikipedia](https://es.wikipedia.org/wiki/User:LudicrousSengir)