levineuwirth.org/build/Stats.hs

800 lines
35 KiB
Haskell

{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Build telemetry page (/build/): corpus statistics, word-length
-- distribution, tag frequencies, link analysis, epistemic coverage,
-- output metrics, repository overview, and build timing.
-- Rendered as a full essay (3-column layout, TOC, metadata block).
module Stats (statsRules) where
import Control.Exception (IOException, catch)
import Control.Monad (forM)
import Data.List (find, isSuffixOf, sort, sortBy)
import qualified Data.Map.Strict as Map
import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
import Data.Ord (comparing, Down (..))
import qualified Data.Set as Set
import Data.Time (getCurrentTime, formatTime, defaultTimeLocale,
Day, parseTimeM, utctDay, addDays, diffDays)
import Data.Time.Calendar (toGregorian, dayOfWeek)
import System.Directory (doesDirectoryExist, getFileSize, listDirectory)
import System.Exit (ExitCode (..))
import System.FilePath (takeExtension, (</>))
import System.Process (readProcessWithExitCode)
import Text.Read (readMaybe)
import qualified Data.Aeson as Aeson
import qualified Data.Aeson.Key as AK
import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Hakyll
import Authors (authorLinksField)
import Contexts (siteCtx)
import Utils (readingTime)
-- ---------------------------------------------------------------------------
-- Types
-- ---------------------------------------------------------------------------
data TypeRow = TypeRow
{ trLabel :: String
, trCount :: Int
, trWords :: Int
}
data PageInfo = PageInfo
{ piTitle :: String
, piUrl :: String
, piWC :: Int
}
-- ---------------------------------------------------------------------------
-- Hakyll helpers
-- ---------------------------------------------------------------------------
loadWC :: Item String -> Compiler Int
loadWC item = do
snap <- loadSnapshot (itemIdentifier item) "word-count"
return $ fromMaybe 0 (readMaybe (itemBody snap))
loadPI :: Item String -> Compiler (Maybe PageInfo)
loadPI item = do
meta <- getMetadata (itemIdentifier item)
mRoute <- getRoute (itemIdentifier item)
wc <- loadWC item
return $ fmap (\r -> PageInfo
{ piTitle = fromMaybe "(untitled)" (lookupString "title" meta)
, piUrl = "/" ++ r
, piWC = wc
}) mRoute
-- ---------------------------------------------------------------------------
-- Formatting helpers
-- ---------------------------------------------------------------------------
commaInt :: Int -> String
commaInt n
| n < 1000 = show n
| otherwise = commaInt (n `div` 1000) ++ "," ++ pad3 (n `mod` 1000)
where
pad3 x
| x < 10 = "00" ++ show x
| x < 100 = "0" ++ show x
| otherwise = show x
formatBytes :: Integer -> String
formatBytes b
| b < 1024 = show b ++ " B"
| b < 1024*1024 = showD (b * 10 `div` 1024) ++ " KB"
| otherwise = showD (b * 10 `div` (1024*1024)) ++ " MB"
where showD n = show (n `div` 10) ++ "." ++ show (n `mod` 10)
rtStr :: Int -> String
rtStr totalWords
| mins < 60 = show mins ++ " min"
| otherwise = show (mins `div` 60) ++ "h " ++ show (mins `mod` 60) ++ "m"
where mins = totalWords `div` 200
pctStr :: Int -> Int -> String
pctStr _ 0 = ""
pctStr n total = show (n * 100 `div` total) ++ "%"
-- | Strip HTML tags for plain-text word counting.
stripHtmlTags :: String -> String
stripHtmlTags [] = []
stripHtmlTags ('<':rest) = stripHtmlTags (drop 1 (dropWhile (/= '>') rest))
stripHtmlTags (c:rest) = c : stripHtmlTags rest
-- | Normalise a page URL for backlink map lookup (strip trailing .html).
normUrl :: String -> String
normUrl u
| ".html" `isSuffixOf` u = take (length u - 5) u
| otherwise = u
pad2 :: (Show a, Integral a) => a -> String
pad2 n = if n < 10 then "0" ++ show n else show n
-- | Median of a non-empty list; returns 0 for empty.
median :: [Int] -> Int
median [] = 0
median xs = let s = sort xs in s !! (length s `div` 2)
-- ---------------------------------------------------------------------------
-- Date helpers (for /stats/ page)
-- ---------------------------------------------------------------------------
parseDay :: String -> Maybe Day
parseDay = parseTimeM True defaultTimeLocale "%Y-%m-%d"
-- | First Monday on or before 'day' (start of its ISO week).
weekStart :: Day -> Day
weekStart day = addDays (fromIntegral (negate (fromEnum (dayOfWeek day)))) day
-- | Intensity class for the heatmap (hm0 … hm4).
heatClass :: Int -> String
heatClass 0 = "hm0"
heatClass n | n < 500 = "hm1"
heatClass n | n < 2000 = "hm2"
heatClass n | n < 5000 = "hm3"
heatClass _ = "hm4"
shortMonth :: Int -> String
shortMonth m = case m of
1 -> "Jan"; 2 -> "Feb"; 3 -> "Mar"; 4 -> "Apr"
5 -> "May"; 6 -> "Jun"; 7 -> "Jul"; 8 -> "Aug"
9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec"
_ -> ""
-- ---------------------------------------------------------------------------
-- Heatmap SVG
-- ---------------------------------------------------------------------------
-- | 52-week writing activity heatmap (inline SVG, CSS-variable colors).
renderHeatmap :: Map.Map Day Int -> Day -> String
renderHeatmap wordsByDay today =
let cellSz = 10 :: Int
gap = 2 :: Int
step = cellSz + gap
hdrH = 22 :: Int -- vertical space for month labels
nWeeks = 52
-- First Monday of the 52-week window
startDay = addDays (fromIntegral (-(nWeeks - 1)) * 7) (weekStart today)
nDays = diffDays today startDay + 1
allDays = [addDays i startDay | i <- [0 .. nDays - 1]]
weekOf d = fromIntegral (diffDays d startDay `div` 7) :: Int
dowOf d = fromEnum (dayOfWeek d) -- Mon=0..Sun=6
svgW = (nWeeks - 1) * step + cellSz
svgH = 6 * step + cellSz + hdrH
-- Month labels: one per first-of-month day
monthLbls = concatMap (\d ->
let (_, mo, da) = toGregorian d
in if da == 1
then "<text class=\"hm-lbl\" x=\"" ++ show (weekOf d * step)
++ "\" y=\"14\">" ++ shortMonth mo ++ "</text>"
else "") allDays
-- One rect per day
cells = concatMap (\d ->
let wc = fromMaybe 0 (Map.lookup d wordsByDay)
(yr, mo, da) = toGregorian d
x = weekOf d * step
y = dowOf d * step + hdrH
tip = show yr ++ "-" ++ pad2 mo ++ "-" ++ pad2 da
++ if wc > 0 then ": " ++ commaInt wc ++ " words" else ""
in "<rect class=\"" ++ heatClass wc ++ "\""
++ " x=\"" ++ show x ++ "\" y=\"" ++ show y ++ "\""
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\""
++ " rx=\"2\"><title>" ++ tip ++ "</title></rect>") allDays
-- Inline legend (five sample rects)
legendW = 5 * step - gap
legendSvg =
"<svg width=\"" ++ show legendW ++ "\" height=\"" ++ show cellSz ++ "\""
++ " viewBox=\"0 0 " ++ show legendW ++ " " ++ show cellSz ++ "\""
++ " style=\"display:inline;vertical-align:middle\">"
++ concatMap (\i ->
"<rect class=\"hm" ++ show i ++ "\""
++ " x=\"" ++ show (i * step) ++ "\" y=\"0\""
++ " width=\"" ++ show cellSz ++ "\" height=\"" ++ show cellSz ++ "\""
++ " rx=\"2\"/>") [0..4]
++ "</svg>"
in "<figure class=\"stats-heatmap\">"
++ "<svg width=\"" ++ show svgW ++ "\" height=\"" ++ show svgH ++ "\""
++ " viewBox=\"0 0 " ++ show svgW ++ " " ++ show svgH ++ "\""
++ " class=\"heatmap-svg\" role=\"img\""
++ " aria-label=\"52-week writing activity heatmap\">"
++ "<style>"
++ ".hm0{fill:var(--hm-0)}.hm1{fill:var(--hm-1)}.hm2{fill:var(--hm-2)}"
++ ".hm3{fill:var(--hm-3)}.hm4{fill:var(--hm-4)}"
++ ".hm-lbl{font-size:9px;fill:var(--text-faint);font-family:sans-serif}"
++ "</style>"
++ monthLbls ++ cells
++ "</svg>"
++ "<figcaption class=\"heatmap-legend\">"
++ "Less\xA0" ++ legendSvg ++ "\xA0More"
++ "</figcaption>"
++ "</figure>"
-- ---------------------------------------------------------------------------
-- Stats page sections
-- ---------------------------------------------------------------------------
renderMonthlyVolume :: Map.Map Day Int -> String
renderMonthlyVolume wordsByDay =
section "volume" "Monthly volume" $
let byMonth = Map.fromListWith (+)
[ ((y, m), wc)
| (day, wc) <- Map.toList wordsByDay
, let (y, m, _) = toGregorian day
]
in if Map.null byMonth
then "<p><em>No dated content yet.</em></p>"
else
let maxWC = max 1 $ maximum $ Map.elems byMonth
bar (y, m) =
let wc = fromMaybe 0 (Map.lookup (y, m) byMonth)
pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC)
lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y)
in "<div class=\"build-bar-row\">"
++ "<span class=\"build-bar-label\">" ++ lbl ++ "</span>"
++ "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:"
++ show pct ++ "%\"></span></span>"
++ "<span class=\"build-bar-count\">"
++ (if wc > 0 then commaInt wc else "") ++ "</span>"
++ "</div>"
in "<div class=\"build-bars\">" ++ concatMap bar (Map.keys byMonth) ++ "</div>"
renderCorpus :: [TypeRow] -> [PageInfo] -> String
renderCorpus typeRows allPIs =
section "corpus" "Corpus" $ concat
[ dl [ ("Total words", commaInt totalWords)
, ("Total pages", commaInt (length allPIs))
, ("Total reading time", rtStr totalWords)
, ("Average length", commaInt avgWC ++ " words")
, ("Median length", commaInt medWC ++ " words")
]
, table ["Type", "Pages", "Words", "Reading time"]
(map row typeRows)
(Just ["Total", commaInt (sum (map trCount typeRows))
, commaInt totalWords, rtStr totalWords])
]
where
hasSomeWC = filter (\p -> piWC p > 0) allPIs
totalWords = sum (map trWords typeRows)
avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC
medWC = median (map piWC hasSomeWC)
row r = [trLabel r, commaInt (trCount r), commaInt (trWords r), rtStr (trWords r)]
renderNotable :: [PageInfo] -> String
renderNotable allPIs =
section "notable" "Notable" $ concat
[ "<p><strong>Longest</strong></p>"
, pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>"
, pageList (take 5 (sortBy (comparing piWC) hasSomeWC))
]
where
hasSomeWC = filter (\p -> piWC p > 50) allPIs
pageList ps = "<ol class=\"build-page-list\">"
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p)
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps
++ "</ol>"
renderStatsTags :: [(String, Int)] -> Int -> String
renderStatsTags topTags uniqueCount =
section "tags" "Tags" $ concat
[ dl [("Unique tags", commaInt uniqueCount)]
, table ["Tag", "Items"] (map row topTags) Nothing
]
where row (t, n) = [link ("/" ++ t ++ "/") t, show n]
statsTOC :: String
statsTOC = "<ol>\n" ++ concatMap item entries ++ "</ol>\n"
where
item (i, t) = "<li><a href=\"#" ++ i ++ "\" data-target=\"" ++ i ++ "\">"
++ t ++ "</a></li>\n"
entries = [ ("activity", "Writing activity")
, ("volume", "Monthly volume")
, ("corpus", "Corpus")
, ("notable", "Notable")
, ("tags", "Tags")
]
-- ---------------------------------------------------------------------------
-- IO: output directory walk
-- ---------------------------------------------------------------------------
walkDir :: FilePath -> IO [(FilePath, Integer)]
walkDir dir = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
fmap concat $ forM entries $ \e -> do
let path = dir </> e
isDir <- doesDirectoryExist path
if isDir
then walkDir path
else do
sz <- getFileSize path `catch` (\(_ :: IOException) -> return 0)
return [(path, sz)]
displayExt :: FilePath -> String
displayExt path = case takeExtension path of
".html" -> ".html"
".css" -> ".css"
".js" -> ".js"
".woff2" -> ".woff2"
".svg" -> ".svg"
".mp3" -> ".mp3"
".pdf" -> ".pdf"
".json" -> ".json"
".xml" -> ".xml"
".ico" -> ".ico"
".png" -> "image"
".jpg" -> "image"
".jpeg" -> "image"
".webp" -> "image"
_ -> "other"
getOutputStats :: IO (Map.Map String (Int, Integer), Int, Integer)
getOutputStats = do
files <- walkDir "_site"
let grouped = foldr (\(path, sz) acc ->
Map.insertWith (\(c1,s1) (c2,s2) -> (c1+c2, s1+s2))
(displayExt path)
(1, sz) acc)
Map.empty files
return (grouped, length files, sum (map snd files))
-- ---------------------------------------------------------------------------
-- IO: lines of code
-- ---------------------------------------------------------------------------
countLinesDir :: FilePath -> String -> (FilePath -> Bool) -> IO (Int, Int)
countLinesDir dir ext skipPred = do
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries
ls <- fmap sum $ forM files $ \e -> do
content <- readFile (dir </> e) `catch` (\(_ :: IOException) -> return "")
return (length (lines content))
return (length files, ls)
getLocStats :: IO (Int, Int, Int, Int, Int, Int)
-- (hsFiles, hsLines, cssFiles, cssLines, jsFiles, jsLines)
getLocStats = do
(hf, hl) <- countLinesDir "build" ".hs" (const False)
(cf, cl) <- countLinesDir "static/css" ".css" (const False)
(jf, jl) <- countLinesDir "static/js" ".js" (".min.js" `isSuffixOf`)
return (hf, hl, cf, cl, jf, jl)
-- ---------------------------------------------------------------------------
-- IO: git stats
-- ---------------------------------------------------------------------------
gitRun :: [String] -> IO String
gitRun args = do
(ec, out, _) <- readProcessWithExitCode "git" args ""
return $ if ec == ExitSuccess then out else ""
getGitStats :: IO (Int, String)
getGitStats = do
countOut <- gitRun ["rev-list", "--count", "HEAD"]
firstOut <- gitRun ["log", "--format=%ad", "--date=short", "--reverse"]
let commits = fromMaybe 0 (readMaybe (filter (/= '\n') countOut) :: Maybe Int)
firstDate = case lines firstOut of { (d:_) -> d; _ -> "\x2014" }
return (commits, firstDate)
-- ---------------------------------------------------------------------------
-- HTML rendering: section helpers
-- ---------------------------------------------------------------------------
section :: String -> String -> String -> String
section id_ title body = concat
[ "<h2 id=\"", id_, "\">", title, "</h2>\n"
, body
]
table :: [String] -> [[String]] -> Maybe [String] -> String
table headers rows mFoot = concat
[ "<table class=\"build-table\">"
, "<thead><tr>", concatMap (\h -> "<th>" ++ h ++ "</th>") headers, "</tr></thead>"
, "<tbody>", concatMap renderRow rows, "</tbody>"
, maybe "" renderFoot mFoot
, "</table>"
]
where
renderRow cells = "<tr>" ++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells ++ "</tr>"
renderFoot cells = "<tfoot><tr class=\"build-total\">"
++ concatMap (\c -> "<td>" ++ c ++ "</td>") cells
++ "</tr></tfoot>"
dl :: [(String, String)] -> String
dl pairs = "<dl class=\"build-meta\">"
++ concatMap (\(k, v) -> "<dt>" ++ k ++ "</dt><dd>" ++ v ++ "</dd>") pairs
++ "</dl>"
link :: String -> String -> String
link url title = "<a href=\"" ++ url ++ "\">" ++ title ++ "</a>"
-- ---------------------------------------------------------------------------
-- HTML rendering: sections
-- ---------------------------------------------------------------------------
renderContent :: [TypeRow] -> String
renderContent rows =
section "content" "Content" $
table
["Type", "Count", "Words", "Reading time"]
(map row rows)
(Just ["Total", commaInt totalCount, commaInt totalWords, rtStr totalWords])
where
totalCount = sum (map trCount rows)
totalWords = sum (map trWords rows)
row r = [ trLabel r
, commaInt (trCount r)
, commaInt (trWords r)
, rtStr (trWords r)
]
renderPages :: [PageInfo] -> Maybe (String,String,String) -> Maybe (String,String,String) -> String
renderPages allPIs mOldest mNewest =
section "pages" "Pages" $ concat
[ dl $
[ ("Total pages", commaInt (length allPIs))
, ("Average length", commaInt avgWC ++ " words")
] ++
maybe [] (\(d,t,u) -> [("Oldest content", d ++ " \x2014 " ++ link u t)]) mOldest ++
maybe [] (\(d,t,u) -> [("Newest content", d ++ " \x2014 " ++ link u t)]) mNewest
, "<p><strong>Longest</strong></p>"
, pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC))
, "<p><strong>Shortest</strong></p>"
, pageList (take 3 (sortBy (comparing piWC) hasSomeWC))
]
where
hasSomeWC = filter (\p -> piWC p > 0) allPIs
avgWC = if null hasSomeWC then 0
else sum (map piWC hasSomeWC) `div` length hasSomeWC
pageList ps = "<ol class=\"build-page-list\">"
++ concatMap (\p -> "<li>" ++ link (piUrl p) (piTitle p)
++ " \x2014 " ++ commaInt (piWC p) ++ " words</li>") ps
++ "</ol>"
renderDistribution :: [Int] -> String
renderDistribution wcs =
section "distribution" "Word-length distribution" $
"<div class=\"build-bars\">" ++ concatMap bar buckets ++ "</div>"
where
bucketOf w
| w < 500 = 0 | w < 1000 = 1 | w < 2000 = 2 | w < 5000 = 3 | otherwise = 4
labels = ["&lt; 500", "500 \x2013 1k", "1k \x2013 2k", "2k \x2013 5k", "\x2265 5k"]
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) 1 acc)
(Map.fromList [(i,0) | i <- [0..4]]) wcs
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0..4]]
maxCount = max 1 (maximum (map snd buckets))
bar (lbl, n) =
let pct = n * 100 `div` maxCount
in concat
[ "<div class=\"build-bar-row\">"
, "<span class=\"build-bar-label\">", lbl, "</span>"
, "<span class=\"build-bar-wrap\"><span class=\"build-bar\" style=\"width:"
, show pct, "%\"></span></span>"
, "<span class=\"build-bar-count\">", show n, "</span>"
, "</div>"
]
renderTagsSection :: [(String, Int)] -> Int -> String
renderTagsSection topTags uniqueCount =
section "tags" "Tags" $ concat
[ dl [("Unique tags", commaInt uniqueCount)]
, table ["Tag", "Items"] (map row topTags) Nothing
]
where
row (t, n) = [link ("/" ++ t ++ "/") t, show n]
renderLinks :: Maybe (String, Int, String) -> Int -> Int -> String
renderLinks mMostLinked orphanCount total =
section "links" "Links" $
dl $
(case mMostLinked of
Nothing -> [("Most-linked page", "\x2014")]
Just (u, n, t) -> [("Most-linked page",
link u t ++ " (" ++ show n ++ " inbound links)")]) ++
[ ("Orphan pages", commaInt orphanCount
++ " of " ++ commaInt total
++ " (" ++ pctStr orphanCount total ++ ")") ]
renderEpistemic :: Int -> Int -> Int -> Int -> Int -> String
renderEpistemic total ws wc wi we =
section "epistemic" "Epistemic coverage" $
table
["Field", "Set", "Coverage"]
[ row "Status" ws
, row "Confidence" wc
, row "Importance" wi
, row "Evidence" we
]
Nothing
where
row label n = [label, show n ++ " / " ++ show total, pctStr n total]
renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> String
renderOutput grouped totalFiles totalSize =
section "output" "Output" $
table
["Type", "Files", "Size"]
(map row (sortBy (comparing (Down . snd . snd)) (Map.toList grouped)))
(Just ["Total", commaInt totalFiles, formatBytes totalSize])
where
row (ext, (n, sz)) = [ext, commaInt n, formatBytes sz]
renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> String
renderRepository hf hl cf cl jf jl commits firstDate =
section "repository" "Repository" $
dl
[ ("Haskell", commaInt hl ++ " lines across " ++ show hf ++ " files")
, ("CSS", commaInt cl ++ " lines across " ++ show cf ++ " files")
, ("JavaScript", commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)")
, ("Total git commits", commaInt commits)
, ("Repository started", firstDate)
]
renderBuild :: String -> String -> String
renderBuild ts dur =
section "build" "Build" $
dl
[ ("Generated", ts)
, ("Last build duration", dur)
]
-- ---------------------------------------------------------------------------
-- Static TOC (matches the nine h2 sections above)
-- ---------------------------------------------------------------------------
pageTOC :: String
pageTOC = "<ol>\n" ++ concatMap item sections ++ "</ol>\n"
where
item (id_, title) =
"<li><a href=\"#" ++ id_ ++ "\" data-target=\"" ++ id_ ++ "\">"
++ title ++ "</a></li>\n"
sections =
[ ("content", "Content")
, ("pages", "Pages")
, ("distribution", "Word-length distribution")
, ("tags", "Tags")
, ("links", "Links")
, ("epistemic", "Epistemic coverage")
, ("output", "Output")
, ("repository", "Repository")
, ("build", "Build")
]
-- ---------------------------------------------------------------------------
-- Rules
-- ---------------------------------------------------------------------------
statsRules :: Tags -> Rules ()
statsRules tags = do
-- -------------------------------------------------------------------------
-- Build telemetry page (/build/)
-- -------------------------------------------------------------------------
create ["build/index.html"] $ do
route idRoute
compile $ do
-- ----------------------------------------------------------------
-- Load all content items
-- ----------------------------------------------------------------
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
comps <- loadAll ("content/music/*/index.md" .&&. hasNoVersion)
-- ----------------------------------------------------------------
-- Word counts
-- ----------------------------------------------------------------
essayWCs <- mapM loadWC essays
postWCs <- mapM loadWC posts
poemWCs <- mapM loadWC poems
fictionWCs <- mapM loadWC fiction
compWCs <- mapM loadWC comps
let allWCs = essayWCs ++ postWCs ++ poemWCs ++ fictionWCs ++ compWCs
rows =
[ TypeRow "Essays" (length essays) (sum essayWCs)
, TypeRow "Blog posts" (length posts) (sum postWCs)
, TypeRow "Poems" (length poems) (sum poemWCs)
, TypeRow "Fiction" (length fiction) (sum fictionWCs)
, TypeRow "Compositions" (length comps) (sum compWCs)
]
-- ----------------------------------------------------------------
-- Per-page info (title + URL + word count)
-- ----------------------------------------------------------------
allItems <- return (essays ++ posts ++ poems ++ fiction ++ comps)
allPIs <- catMaybes <$> mapM loadPI allItems
-- ----------------------------------------------------------------
-- Dates (essays + posts only)
-- ----------------------------------------------------------------
let getDateMeta item = do
meta <- getMetadata (itemIdentifier item)
mRoute <- getRoute (itemIdentifier item)
let d = fromMaybe "" (lookupString "date" meta)
t = fromMaybe "(untitled)" (lookupString "title" meta)
u = maybe "#" (\r -> "/" ++ r) mRoute
return (d, t, u)
essayDates <- mapM getDateMeta essays
postDates <- mapM getDateMeta posts
let allDates = filter (\(d,_,_) -> not (null d)) (essayDates ++ postDates)
sortedDates = sortBy (comparing (\(d,_,_) -> d)) allDates
oldestDate = listToMaybe sortedDates
newestDate = listToMaybe (reverse sortedDates)
-- ----------------------------------------------------------------
-- Tags
-- ----------------------------------------------------------------
let tagFreqs = map (\(t, ids) -> (t, length ids)) (tagsMap tags)
topTags = take 15 (sortBy (comparing (Down . snd)) tagFreqs)
uniqueTags = length tagFreqs
-- ----------------------------------------------------------------
-- Backlinks: most-linked page + orphan count
-- ----------------------------------------------------------------
blItem <- load (fromFilePath "data/backlinks.json") :: Compiler (Item String)
let rawBL = itemBody blItem
mBLVal = Aeson.decodeStrict (TE.encodeUtf8 (T.pack rawBL)) :: Maybe Aeson.Value
blPairs = case mBLVal of
Just (Aeson.Object km) ->
[ (T.unpack (AK.toText k),
case v of Aeson.Array arr -> V.length arr; _ -> 0)
| (k, v) <- KM.toList km ]
_ -> []
blSet = Set.fromList (map fst blPairs)
orphanCount = length
[ p | p <- allPIs
, not (Set.member (normUrl (piUrl p)) blSet) ]
mostLinked = listToMaybe (sortBy (comparing (Down . snd)) blPairs)
mostLinkedInfo = mostLinked >>= \(url, ct) ->
let mTitle = piTitle <$> find (\p -> normUrl (piUrl p) == url) allPIs
in Just (url, ct, fromMaybe url mTitle)
-- ----------------------------------------------------------------
-- Epistemic coverage (essays + posts)
-- ----------------------------------------------------------------
essayMetas <- mapM (getMetadata . itemIdentifier) essays
postMetas <- mapM (getMetadata . itemIdentifier) posts
let epMetas = essayMetas ++ postMetas
epTotal = length epMetas
ep f = length (filter (isJust . f) epMetas)
withStatus = ep (lookupString "status")
withConf = ep (lookupString "confidence")
withImp = ep (lookupString "importance")
withEv = ep (lookupString "evidence")
-- ----------------------------------------------------------------
-- Output directory stats
-- ----------------------------------------------------------------
(outputGrouped, totalFiles, totalSize) <-
unsafeCompiler getOutputStats
-- ----------------------------------------------------------------
-- Lines of code + git stats
-- ----------------------------------------------------------------
(hf, hl, cf, cl, jf, jl) <- unsafeCompiler getLocStats
(commits, firstDate) <- unsafeCompiler getGitStats
-- ----------------------------------------------------------------
-- Build timestamp + last build duration
-- ----------------------------------------------------------------
buildTimestamp <- unsafeCompiler $
formatTime defaultTimeLocale "%Y-%m-%d %H:%M UTC" <$> getCurrentTime
lastBuildDur <- unsafeCompiler $
(readFile "data/last-build-seconds.txt" >>= \s ->
let secs = fromMaybe 0 (readMaybe (filter (/= '\n') s) :: Maybe Int)
in return (show secs ++ "s"))
`catch` (\(_ :: IOException) -> return "\x2014")
-- ----------------------------------------------------------------
-- Assemble page
-- ----------------------------------------------------------------
let content = concat
[ renderContent rows
, renderPages allPIs oldestDate newestDate
, renderDistribution allWCs
, renderTagsSection topTags uniqueTags
, renderLinks mostLinkedInfo orphanCount (length allPIs)
, renderEpistemic epTotal withStatus withConf withImp withEv
, renderOutput outputGrouped totalFiles totalSize
, renderRepository hf hl cf cl jf jl commits firstDate
, renderBuild buildTimestamp lastBuildDur
]
plainText = stripHtmlTags content
wc = length (words plainText)
rt = readingTime plainText
ctx = constField "toc" pageTOC
<> constField "word-count" (show wc)
<> constField "reading-time" (show rt)
<> constField "title" "Build Telemetry"
<> constField "abstract" "Per-build corpus statistics, tag distribution, \
\link analysis, epistemic coverage, output metrics, \
\repository overview, and build timing."
<> constField "build" "true"
<> authorLinksField
<> siteCtx
makeItem content
>>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls
-- -------------------------------------------------------------------------
-- Writing statistics page (/stats/)
-- -------------------------------------------------------------------------
create ["stats/index.html"] $ do
route idRoute
compile $ do
essays <- loadAll ("content/essays/*.md" .&&. hasNoVersion)
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
comps <- loadAll ("content/music/*/index.md" .&&. hasNoVersion)
essayWCs <- mapM loadWC essays
postWCs <- mapM loadWC posts
poemWCs <- mapM loadWC poems
fictionWCs <- mapM loadWC fiction
compWCs <- mapM loadWC comps
let allItems = essays ++ posts ++ poems ++ fiction ++ comps
typeRows =
[ TypeRow "Essays" (length essays) (sum essayWCs)
, TypeRow "Blog posts" (length posts) (sum postWCs)
, TypeRow "Poems" (length poems) (sum poemWCs)
, TypeRow "Fiction" (length fiction) (sum fictionWCs)
, TypeRow "Compositions" (length comps) (sum compWCs)
]
allPIs <- catMaybes <$> mapM loadPI allItems
-- Build wordsByDay: for each item with a parseable `date`, map that
-- day to the item's word count (summing if multiple items share a date).
datePairs <- fmap catMaybes $ forM allItems $ \item -> do
meta <- getMetadata (itemIdentifier item)
wc <- loadWC item
return $ case lookupString "date" meta >>= parseDay of
Nothing -> Nothing
Just d -> Just (d, wc)
let wordsByDay = Map.fromListWith (+) datePairs
let tagFreqs = map (\(t, ids) -> (t, length ids)) (tagsMap tags)
topTags = take 15 (sortBy (comparing (Down . snd)) tagFreqs)
uniqueTags = length tagFreqs
today <- unsafeCompiler (utctDay <$> getCurrentTime)
let content = concat
[ section "activity" "Writing activity" (renderHeatmap wordsByDay today)
, renderMonthlyVolume wordsByDay
, renderCorpus typeRows allPIs
, renderNotable allPIs
, renderStatsTags topTags uniqueTags
]
plainText = stripHtmlTags content
wc = length (words plainText)
rt = readingTime plainText
ctx = constField "toc" statsTOC
<> constField "word-count" (show wc)
<> constField "reading-time" (show rt)
<> constField "title" "Writing Statistics"
<> constField "abstract" "Writing activity, corpus breakdown, \
\and tag distribution — computed at build time."
<> constField "build" "true"
<> authorLinksField
<> siteCtx
makeItem content
>>= loadAndApplyTemplate "templates/essay.html" ctx
>>= loadAndApplyTemplate "templates/default.html" ctx
>>= relativizeUrls