Add robots.txt and sitemap.xml; tidy essay-route prefix-strip

- Emit a minimal robots.txt that points at the sitemap.
- Emit sitemap.xml covering every dated content page (essays, blog,
  fiction, poetry, music) with absolute <loc> and frontmatter-derived
  <lastmod>. Standalone pages (about, colophon, etc.) are
  intentionally omitted: they're reachable via the main nav, lack
  date: frontmatter, and would force a fallback lastmod that
  misrepresents staleness.
- Replace the magic 'drop 8' offset in essay routing with
  stripPrefix "content/". Same behavior, but reads structurally and
  fails closed if the prefix ever changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Levi Neuwirth 2026-05-07 15:08:33 -04:00
parent 339433db20
commit a818b7df9b
2 changed files with 59 additions and 3 deletions

View File

@ -4,7 +4,7 @@ module Site (rules) where
import Control.Monad (forM, forM_, when)
import Data.Char (isSpace, toUpper)
import Data.List (groupBy, isPrefixOf, sort, sortBy)
import Data.List (groupBy, isPrefixOf, sort, sortBy, stripPrefix)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
import Data.Ord (Down (..), comparing)
@ -383,13 +383,14 @@ rules = do
fname = takeFileName fp
isIndex = fname == "index.md"
isDraft = "content/drafts/essays/" `isPrefixOf` fp
stripContent = fromMaybe fp (stripPrefix "content/" fp)
in case (isDraft, isIndex) of
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
(True, True) -> replaceExtension (drop 8 fp) "html"
(True, True) -> replaceExtension stripContent "html"
-- content/drafts/essays/foo.md → drafts/essays/foo.html
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
-- content/essays/slug/index.md → essays/slug/index.html
(False, True) -> replaceExtension (drop 8 fp) "html"
(False, True) -> replaceExtension stripContent "html"
-- content/essays/foo.md → essays/foo.html
(False, False) -> "essays/" ++ replaceExtension fname "html"
compile $ essayCompiler
@ -919,6 +920,49 @@ rules = do
<> defaultContext
renderAtom musicFeedConfig feedCtx compositions
-- ---------------------------------------------------------------------------
-- robots.txt — minimal, just points crawlers at the sitemap
-- ---------------------------------------------------------------------------
create ["robots.txt"] $ do
route idRoute
compile $ makeItem $ unlines
[ "User-agent: *"
, "Allow: /"
, ""
, "Sitemap: https://levineuwirth.org/sitemap.xml"
]
-- ---------------------------------------------------------------------------
-- sitemap.xml — every dated content page (essays, blog, poetry, fiction,
-- music). Standalone pages (about, colophon, etc.) are intentionally
-- omitted: they're reachable via the main nav, lack `date:` frontmatter,
-- and would force a fallback lastmod that misrepresents staleness.
-- ---------------------------------------------------------------------------
create ["sitemap.xml"] $ do
route idRoute
compile $ do
entries <- recentFirst
=<< loadAllSnapshots
( ( allEssays
.||. "content/blog/*.md"
.||. "content/fiction/*.md"
.||. allPoetry
.||. "content/music/*/index.md"
)
.&&. hasNoVersion
)
"content"
let siteRoot = "https://levineuwirth.org"
sitemapItemCtx =
constField "root" siteRoot
<> dateField "lastmod" "%Y-%m-%d"
<> defaultContext
sitemapCtx =
constField "root" siteRoot
<> listField "entries" sitemapItemCtx (return entries)
makeItem ("" :: String)
>>= loadAndApplyTemplate "templates/sitemap.xml" sitemapCtx
-- ---------------------------------------------------------------------------
-- Epistemic metadata extraction
-- ---------------------------------------------------------------------------

12
templates/sitemap.xml Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>$root$/</loc>
</url>
$for(entries)$
<url>
<loc>$root$$url$</loc>
$if(lastmod)$ <lastmod>$lastmod$</lastmod>
$endif$ </url>
$endfor$
</urlset>