Add robots.txt and sitemap.xml; tidy essay-route prefix-strip

- Emit a minimal robots.txt that points at the sitemap.
- Emit sitemap.xml covering every dated content page (essays, blog,
  fiction, poetry, music) with absolute <loc> and frontmatter-derived
  <lastmod>. Standalone pages (about, colophon, etc.) are
  intentionally omitted: they're reachable via the main nav, lack
  date: frontmatter, and would force a fallback lastmod that
  misrepresents staleness.
- Replace the magic 'drop 8' offset in essay routing with
  stripPrefix "content/". Same behavior, but reads structurally and
  fails closed if the prefix ever changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Levi Neuwirth 2026-05-07 15:08:33 -04:00
parent 339433db20
commit a818b7df9b
2 changed files with 59 additions and 3 deletions

View File

@ -4,7 +4,7 @@ module Site (rules) where
import Control.Monad (forM, forM_, when) import Control.Monad (forM, forM_, when)
import Data.Char (isSpace, toUpper) import Data.Char (isSpace, toUpper)
import Data.List (groupBy, isPrefixOf, sort, sortBy) import Data.List (groupBy, isPrefixOf, sort, sortBy, stripPrefix)
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Maybe (catMaybes, fromMaybe, listToMaybe) import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
import Data.Ord (Down (..), comparing) import Data.Ord (Down (..), comparing)
@ -383,13 +383,14 @@ rules = do
fname = takeFileName fp fname = takeFileName fp
isIndex = fname == "index.md" isIndex = fname == "index.md"
isDraft = "content/drafts/essays/" `isPrefixOf` fp isDraft = "content/drafts/essays/" `isPrefixOf` fp
stripContent = fromMaybe fp (stripPrefix "content/" fp)
in case (isDraft, isIndex) of in case (isDraft, isIndex) of
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html -- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
(True, True) -> replaceExtension (drop 8 fp) "html" (True, True) -> replaceExtension stripContent "html"
-- content/drafts/essays/foo.md → drafts/essays/foo.html -- content/drafts/essays/foo.md → drafts/essays/foo.html
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html" (True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
-- content/essays/slug/index.md → essays/slug/index.html -- content/essays/slug/index.md → essays/slug/index.html
(False, True) -> replaceExtension (drop 8 fp) "html" (False, True) -> replaceExtension stripContent "html"
-- content/essays/foo.md → essays/foo.html -- content/essays/foo.md → essays/foo.html
(False, False) -> "essays/" ++ replaceExtension fname "html" (False, False) -> "essays/" ++ replaceExtension fname "html"
compile $ essayCompiler compile $ essayCompiler
@ -919,6 +920,49 @@ rules = do
<> defaultContext <> defaultContext
renderAtom musicFeedConfig feedCtx compositions renderAtom musicFeedConfig feedCtx compositions
-- ---------------------------------------------------------------------------
-- robots.txt — minimal, just points crawlers at the sitemap
-- ---------------------------------------------------------------------------
create ["robots.txt"] $ do
route idRoute
compile $ makeItem $ unlines
[ "User-agent: *"
, "Allow: /"
, ""
, "Sitemap: https://levineuwirth.org/sitemap.xml"
]
-- ---------------------------------------------------------------------------
-- sitemap.xml — every dated content page (essays, blog, poetry, fiction,
-- music). Standalone pages (about, colophon, etc.) are intentionally
-- omitted: they're reachable via the main nav, lack `date:` frontmatter,
-- and would force a fallback lastmod that misrepresents staleness.
-- ---------------------------------------------------------------------------
create ["sitemap.xml"] $ do
route idRoute
compile $ do
entries <- recentFirst
=<< loadAllSnapshots
( ( allEssays
.||. "content/blog/*.md"
.||. "content/fiction/*.md"
.||. allPoetry
.||. "content/music/*/index.md"
)
.&&. hasNoVersion
)
"content"
let siteRoot = "https://levineuwirth.org"
sitemapItemCtx =
constField "root" siteRoot
<> dateField "lastmod" "%Y-%m-%d"
<> defaultContext
sitemapCtx =
constField "root" siteRoot
<> listField "entries" sitemapItemCtx (return entries)
makeItem ("" :: String)
>>= loadAndApplyTemplate "templates/sitemap.xml" sitemapCtx
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Epistemic metadata extraction -- Epistemic metadata extraction
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------

12
templates/sitemap.xml Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>$root$/</loc>
</url>
$for(entries)$
<url>
<loc>$root$$url$</loc>
$if(lastmod)$ <lastmod>$lastmod$</lastmod>
$endif$ </url>
$endfor$
</urlset>