Add robots.txt and sitemap.xml; tidy essay-route prefix-strip
- Emit a minimal robots.txt that points at the sitemap. - Emit sitemap.xml covering every dated content page (essays, blog, fiction, poetry, music) with absolute <loc> and frontmatter-derived <lastmod>. Standalone pages (about, colophon, etc.) are intentionally omitted: they're reachable via the main nav, lack date: frontmatter, and would force a fallback lastmod that misrepresents staleness. - Replace the magic 'drop 8' offset in essay routing with stripPrefix "content/". Same behavior, but reads structurally and fails closed if the prefix ever changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
339433db20
commit
a818b7df9b
|
|
@ -4,7 +4,7 @@ module Site (rules) where
|
||||||
|
|
||||||
import Control.Monad (forM, forM_, when)
|
import Control.Monad (forM, forM_, when)
|
||||||
import Data.Char (isSpace, toUpper)
|
import Data.Char (isSpace, toUpper)
|
||||||
import Data.List (groupBy, isPrefixOf, sort, sortBy)
|
import Data.List (groupBy, isPrefixOf, sort, sortBy, stripPrefix)
|
||||||
import Data.Map.Strict (Map)
|
import Data.Map.Strict (Map)
|
||||||
import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
|
import Data.Maybe (catMaybes, fromMaybe, listToMaybe)
|
||||||
import Data.Ord (Down (..), comparing)
|
import Data.Ord (Down (..), comparing)
|
||||||
|
|
@ -383,13 +383,14 @@ rules = do
|
||||||
fname = takeFileName fp
|
fname = takeFileName fp
|
||||||
isIndex = fname == "index.md"
|
isIndex = fname == "index.md"
|
||||||
isDraft = "content/drafts/essays/" `isPrefixOf` fp
|
isDraft = "content/drafts/essays/" `isPrefixOf` fp
|
||||||
|
stripContent = fromMaybe fp (stripPrefix "content/" fp)
|
||||||
in case (isDraft, isIndex) of
|
in case (isDraft, isIndex) of
|
||||||
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
|
-- content/drafts/essays/slug/index.md → drafts/essays/slug/index.html
|
||||||
(True, True) -> replaceExtension (drop 8 fp) "html"
|
(True, True) -> replaceExtension stripContent "html"
|
||||||
-- content/drafts/essays/foo.md → drafts/essays/foo.html
|
-- content/drafts/essays/foo.md → drafts/essays/foo.html
|
||||||
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
|
(True, False) -> "drafts/essays/" ++ replaceExtension fname "html"
|
||||||
-- content/essays/slug/index.md → essays/slug/index.html
|
-- content/essays/slug/index.md → essays/slug/index.html
|
||||||
(False, True) -> replaceExtension (drop 8 fp) "html"
|
(False, True) -> replaceExtension stripContent "html"
|
||||||
-- content/essays/foo.md → essays/foo.html
|
-- content/essays/foo.md → essays/foo.html
|
||||||
(False, False) -> "essays/" ++ replaceExtension fname "html"
|
(False, False) -> "essays/" ++ replaceExtension fname "html"
|
||||||
compile $ essayCompiler
|
compile $ essayCompiler
|
||||||
|
|
@ -919,6 +920,49 @@ rules = do
|
||||||
<> defaultContext
|
<> defaultContext
|
||||||
renderAtom musicFeedConfig feedCtx compositions
|
renderAtom musicFeedConfig feedCtx compositions
|
||||||
|
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
-- robots.txt — minimal, just points crawlers at the sitemap
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
create ["robots.txt"] $ do
|
||||||
|
route idRoute
|
||||||
|
compile $ makeItem $ unlines
|
||||||
|
[ "User-agent: *"
|
||||||
|
, "Allow: /"
|
||||||
|
, ""
|
||||||
|
, "Sitemap: https://levineuwirth.org/sitemap.xml"
|
||||||
|
]
|
||||||
|
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
-- sitemap.xml — every dated content page (essays, blog, poetry, fiction,
|
||||||
|
-- music). Standalone pages (about, colophon, etc.) are intentionally
|
||||||
|
-- omitted: they're reachable via the main nav, lack `date:` frontmatter,
|
||||||
|
-- and would force a fallback lastmod that misrepresents staleness.
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
create ["sitemap.xml"] $ do
|
||||||
|
route idRoute
|
||||||
|
compile $ do
|
||||||
|
entries <- recentFirst
|
||||||
|
=<< loadAllSnapshots
|
||||||
|
( ( allEssays
|
||||||
|
.||. "content/blog/*.md"
|
||||||
|
.||. "content/fiction/*.md"
|
||||||
|
.||. allPoetry
|
||||||
|
.||. "content/music/*/index.md"
|
||||||
|
)
|
||||||
|
.&&. hasNoVersion
|
||||||
|
)
|
||||||
|
"content"
|
||||||
|
let siteRoot = "https://levineuwirth.org"
|
||||||
|
sitemapItemCtx =
|
||||||
|
constField "root" siteRoot
|
||||||
|
<> dateField "lastmod" "%Y-%m-%d"
|
||||||
|
<> defaultContext
|
||||||
|
sitemapCtx =
|
||||||
|
constField "root" siteRoot
|
||||||
|
<> listField "entries" sitemapItemCtx (return entries)
|
||||||
|
makeItem ("" :: String)
|
||||||
|
>>= loadAndApplyTemplate "templates/sitemap.xml" sitemapCtx
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
-- Epistemic metadata extraction
|
-- Epistemic metadata extraction
|
||||||
-- ---------------------------------------------------------------------------
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
<url>
|
||||||
|
<loc>$root$/</loc>
|
||||||
|
</url>
|
||||||
|
$for(entries)$
|
||||||
|
<url>
|
||||||
|
<loc>$root$$url$</loc>
|
||||||
|
$if(lastmod)$ <lastmod>$lastmod$</lastmod>
|
||||||
|
$endif$ </url>
|
||||||
|
$endfor$
|
||||||
|
</urlset>
|
||||||
Loading…
Reference in New Issue