{-# LANGUAGE GHC2021 #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Build telemetry page (/build/): corpus statistics, word-length
-- distribution, tag frequencies, link analysis, epistemic coverage,
-- output metrics, repository overview, and build timing.
-- Rendered as a full essay (3-column layout, TOC, metadata block).
module Stats (statsRules) where
import Control.Exception (IOException, catch)
import Control.Monad (forM)
import Data.List (find, isSuffixOf, sort, sortBy)
import qualified Data.Map.Strict as Map
import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
import Data.Ord (comparing, Down (..))
import qualified Data.Set as Set
import Data.Time (getCurrentTime, formatTime, defaultTimeLocale,
Day, parseTimeM, utctDay, addDays, diffDays)
import Data.Time.Calendar (toGregorian, dayOfWeek)
import System.Directory (doesDirectoryExist, getFileSize, listDirectory)
import System.Exit (ExitCode (..))
import System.FilePath (takeExtension, (>))
import System.Process (readProcessWithExitCode)
import Text.Read (readMaybe)
import qualified Data.Aeson as Aeson
import qualified Data.Aeson.Key as AK
import qualified Data.Aeson.KeyMap as KM
import qualified Data.Vector as V
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Hakyll
import Authors (authorLinksField)
import Contexts (siteCtx)
import Utils (readingTime)
-- ---------------------------------------------------------------------------
-- Types
-- ---------------------------------------------------------------------------
data TypeRow = TypeRow
{ trLabel :: String
, trCount :: Int
, trWords :: Int
}
data PageInfo = PageInfo
{ piTitle :: String
, piUrl :: String
, piWC :: Int
}
-- ---------------------------------------------------------------------------
-- Hakyll helpers
-- ---------------------------------------------------------------------------
loadWC :: Item String -> Compiler Int
loadWC item = do
snap <- loadSnapshot (itemIdentifier item) "word-count"
return $ fromMaybe 0 (readMaybe (itemBody snap))
loadPI :: Item String -> Compiler (Maybe PageInfo)
loadPI item = do
meta <- getMetadata (itemIdentifier item)
mRoute <- getRoute (itemIdentifier item)
wc <- loadWC item
return $ fmap (\r -> PageInfo
{ piTitle = fromMaybe "(untitled)" (lookupString "title" meta)
, piUrl = "/" ++ r
, piWC = wc
}) mRoute
-- ---------------------------------------------------------------------------
-- Formatting helpers
-- ---------------------------------------------------------------------------
commaInt :: Int -> String
commaInt n
| n < 1000 = show n
| otherwise = commaInt (n `div` 1000) ++ "," ++ pad3 (n `mod` 1000)
where
pad3 x
| x < 10 = "00" ++ show x
| x < 100 = "0" ++ show x
| otherwise = show x
formatBytes :: Integer -> String
formatBytes b
| b < 1024 = show b ++ " B"
| b < 1024*1024 = showD (b * 10 `div` 1024) ++ " KB"
| otherwise = showD (b * 10 `div` (1024*1024)) ++ " MB"
where showD n = show (n `div` 10) ++ "." ++ show (n `mod` 10)
rtStr :: Int -> String
rtStr totalWords
| mins < 60 = show mins ++ " min"
| otherwise = show (mins `div` 60) ++ "h " ++ show (mins `mod` 60) ++ "m"
where mins = totalWords `div` 200
pctStr :: Int -> Int -> String
pctStr _ 0 = "—"
pctStr n total = show (n * 100 `div` total) ++ "%"
-- | Strip HTML tags for plain-text word counting.
stripHtmlTags :: String -> String
stripHtmlTags [] = []
stripHtmlTags ('<':rest) = stripHtmlTags (drop 1 (dropWhile (/= '>') rest))
stripHtmlTags (c:rest) = c : stripHtmlTags rest
-- | Normalise a page URL for backlink map lookup (strip trailing .html).
normUrl :: String -> String
normUrl u
| ".html" `isSuffixOf` u = take (length u - 5) u
| otherwise = u
pad2 :: (Show a, Integral a) => a -> String
pad2 n = if n < 10 then "0" ++ show n else show n
-- | Median of a non-empty list; returns 0 for empty.
median :: [Int] -> Int
median [] = 0
median xs = let s = sort xs in s !! (length s `div` 2)
-- ---------------------------------------------------------------------------
-- Date helpers (for /stats/ page)
-- ---------------------------------------------------------------------------
parseDay :: String -> Maybe Day
parseDay = parseTimeM True defaultTimeLocale "%Y-%m-%d"
-- | First Monday on or before 'day' (start of its ISO week).
weekStart :: Day -> Day
weekStart day = addDays (fromIntegral (negate (fromEnum (dayOfWeek day)))) day
-- | Intensity class for the heatmap (hm0 … hm4).
heatClass :: Int -> String
heatClass 0 = "hm0"
heatClass n | n < 500 = "hm1"
heatClass n | n < 2000 = "hm2"
heatClass n | n < 5000 = "hm3"
heatClass _ = "hm4"
shortMonth :: Int -> String
shortMonth m = case m of
1 -> "Jan"; 2 -> "Feb"; 3 -> "Mar"; 4 -> "Apr"
5 -> "May"; 6 -> "Jun"; 7 -> "Jul"; 8 -> "Aug"
9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec"
_ -> ""
-- ---------------------------------------------------------------------------
-- Heatmap SVG
-- ---------------------------------------------------------------------------
-- | 52-week writing activity heatmap (inline SVG, CSS-variable colors).
renderHeatmap :: Map.Map Day Int -> Day -> String
renderHeatmap wordsByDay today =
let cellSz = 10 :: Int
gap = 2 :: Int
step = cellSz + gap
hdrH = 22 :: Int -- vertical space for month labels
nWeeks = 52
-- First Monday of the 52-week window
startDay = addDays (fromIntegral (-(nWeeks - 1)) * 7) (weekStart today)
nDays = diffDays today startDay + 1
allDays = [addDays i startDay | i <- [0 .. nDays - 1]]
weekOf d = fromIntegral (diffDays d startDay `div` 7) :: Int
dowOf d = fromEnum (dayOfWeek d) -- Mon=0..Sun=6
svgW = (nWeeks - 1) * step + cellSz
svgH = 6 * step + cellSz + hdrH
-- Month labels: one per first-of-month day
monthLbls = concatMap (\d ->
let (_, mo, da) = toGregorian d
in if da == 1
then "
No dated content yet.
" else let maxWC = max 1 $ maximum $ Map.elems byMonth bar (y, m) = let wc = fromMaybe 0 (Map.lookup (y, m) byMonth) pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC) lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y) in "" in "" renderCorpus :: [TypeRow] -> [PageInfo] -> String renderCorpus typeRows allPIs = section "corpus" "Corpus" $ concat [ dl [ ("Total words", commaInt totalWords) , ("Total pages", commaInt (length allPIs)) , ("Total reading time", rtStr totalWords) , ("Average length", commaInt avgWC ++ " words") , ("Median length", commaInt medWC ++ " words") ] , table ["Type", "Pages", "Words", "Reading time"] (map row typeRows) (Just ["Total", commaInt (sum (map trCount typeRows)) , commaInt totalWords, rtStr totalWords]) ] where hasSomeWC = filter (\p -> piWC p > 0) allPIs totalWords = sum (map trWords typeRows) avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC medWC = median (map piWC hasSomeWC) row r = [trLabel r, commaInt (trCount r), commaInt (trWords r), rtStr (trWords r)] renderNotable :: [PageInfo] -> String renderNotable allPIs = section "notable" "Notable" $ concat [ "Longest
" , pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC)) , "Shortest
" , pageList (take 5 (sortBy (comparing piWC) hasSomeWC)) ] where hasSomeWC = filter (\p -> piWC p > 50) allPIs pageList ps = "| " ++ h ++ " | ") headers, "
|---|
Longest
" , pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC)) , "Shortest
" , pageList (take 3 (sortBy (comparing piWC) hasSomeWC)) ] where hasSomeWC = filter (\p -> piWC p > 0) allPIs avgWC = if null hasSomeWC then 0 else sum (map piWC hasSomeWC) `div` length hasSomeWC pageList ps = "