{-# LANGUAGE GHC2021 #-} {-# LANGUAGE OverloadedStrings #-} -- | Build telemetry page (/build/): corpus statistics, word-length -- distribution, tag frequencies, link analysis, epistemic coverage, -- output metrics, repository overview, and build timing. -- Rendered as a full essay (3-column layout, TOC, metadata block). module Stats (statsRules) where import Control.Exception (IOException, catch) import Control.Monad (forM) import Data.List (find, isSuffixOf, sortBy) import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe) import Data.Ord (comparing, Down (..)) import qualified Data.Set as Set import Data.Time (getCurrentTime, formatTime, defaultTimeLocale) import System.Directory (doesDirectoryExist, getFileSize, listDirectory) import System.Exit (ExitCode (..)) import System.FilePath (takeExtension, (>)) import System.Process (readProcessWithExitCode) import Text.Read (readMaybe) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Key as AK import qualified Data.Aeson.KeyMap as KM import qualified Data.Vector as V import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Hakyll import Authors (authorLinksField) import Contexts (siteCtx) import Utils (readingTime) -- --------------------------------------------------------------------------- -- Types -- --------------------------------------------------------------------------- data TypeRow = TypeRow { trLabel :: String , trCount :: Int , trWords :: Int } data PageInfo = PageInfo { piTitle :: String , piUrl :: String , piWC :: Int } -- --------------------------------------------------------------------------- -- Hakyll helpers -- --------------------------------------------------------------------------- loadWC :: Item String -> Compiler Int loadWC item = do snap <- loadSnapshot (itemIdentifier item) "word-count" return $ fromMaybe 0 (readMaybe (itemBody snap)) loadPI :: Item String -> Compiler (Maybe PageInfo) loadPI item = do meta <- getMetadata (itemIdentifier item) mRoute <- getRoute (itemIdentifier item) wc <- loadWC item return $ fmap (\r -> PageInfo { piTitle = fromMaybe "(untitled)" (lookupString "title" meta) , piUrl = "/" ++ r , piWC = wc }) mRoute -- --------------------------------------------------------------------------- -- Formatting helpers -- --------------------------------------------------------------------------- commaInt :: Int -> String commaInt n | n < 1000 = show n | otherwise = commaInt (n `div` 1000) ++ "," ++ pad3 (n `mod` 1000) where pad3 x | x < 10 = "00" ++ show x | x < 100 = "0" ++ show x | otherwise = show x formatBytes :: Integer -> String formatBytes b | b < 1024 = show b ++ " B" | b < 1024*1024 = showD (b * 10 `div` 1024) ++ " KB" | otherwise = showD (b * 10 `div` (1024*1024)) ++ " MB" where showD n = show (n `div` 10) ++ "." ++ show (n `mod` 10) rtStr :: Int -> String rtStr totalWords | mins < 60 = show mins ++ " min" | otherwise = show (mins `div` 60) ++ "h " ++ show (mins `mod` 60) ++ "m" where mins = totalWords `div` 200 pctStr :: Int -> Int -> String pctStr _ 0 = "—" pctStr n total = show (n * 100 `div` total) ++ "%" -- | Strip HTML tags for plain-text word counting. stripHtmlTags :: String -> String stripHtmlTags [] = [] stripHtmlTags ('<':rest) = stripHtmlTags (drop 1 (dropWhile (/= '>') rest)) stripHtmlTags (c:rest) = c : stripHtmlTags rest -- | Normalise a page URL for backlink map lookup (strip trailing .html). normUrl :: String -> String normUrl u | ".html" `isSuffixOf` u = take (length u - 5) u | otherwise = u -- --------------------------------------------------------------------------- -- IO: output directory walk -- --------------------------------------------------------------------------- walkDir :: FilePath -> IO [(FilePath, Integer)] walkDir dir = do entries <- listDirectory dir `catch` (\(_ :: IOException) -> return []) fmap concat $ forM entries $ \e -> do let path = dir > e isDir <- doesDirectoryExist path if isDir then walkDir path else do sz <- getFileSize path `catch` (\(_ :: IOException) -> return 0) return [(path, sz)] displayExt :: FilePath -> String displayExt path = case takeExtension path of ".html" -> ".html" ".css" -> ".css" ".js" -> ".js" ".woff2" -> ".woff2" ".svg" -> ".svg" ".mp3" -> ".mp3" ".pdf" -> ".pdf" ".json" -> ".json" ".xml" -> ".xml" ".ico" -> ".ico" ".png" -> "image" ".jpg" -> "image" ".jpeg" -> "image" ".webp" -> "image" _ -> "other" getOutputStats :: IO (Map.Map String (Int, Integer), Int, Integer) getOutputStats = do files <- walkDir "_site" let grouped = foldr (\(path, sz) acc -> Map.insertWith (\(c1,s1) (c2,s2) -> (c1+c2, s1+s2)) (displayExt path) (1, sz) acc) Map.empty files return (grouped, length files, sum (map snd files)) -- --------------------------------------------------------------------------- -- IO: lines of code -- --------------------------------------------------------------------------- countLinesDir :: FilePath -> String -> (FilePath -> Bool) -> IO (Int, Int) countLinesDir dir ext skipPred = do entries <- listDirectory dir `catch` (\(_ :: IOException) -> return []) let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries ls <- fmap sum $ forM files $ \e -> do content <- readFile (dir > e) `catch` (\(_ :: IOException) -> return "") return (length (lines content)) return (length files, ls) getLocStats :: IO (Int, Int, Int, Int, Int, Int) -- (hsFiles, hsLines, cssFiles, cssLines, jsFiles, jsLines) getLocStats = do (hf, hl) <- countLinesDir "build" ".hs" (const False) (cf, cl) <- countLinesDir "static/css" ".css" (const False) (jf, jl) <- countLinesDir "static/js" ".js" (".min.js" `isSuffixOf`) return (hf, hl, cf, cl, jf, jl) -- --------------------------------------------------------------------------- -- IO: git stats -- --------------------------------------------------------------------------- gitRun :: [String] -> IO String gitRun args = do (ec, out, _) <- readProcessWithExitCode "git" args "" return $ if ec == ExitSuccess then out else "" getGitStats :: IO (Int, String) getGitStats = do countOut <- gitRun ["rev-list", "--count", "HEAD"] firstOut <- gitRun ["log", "--format=%ad", "--date=short", "--reverse"] let commits = fromMaybe 0 (readMaybe (filter (/= '\n') countOut) :: Maybe Int) firstDate = case lines firstOut of { (d:_) -> d; _ -> "\x2014" } return (commits, firstDate) -- --------------------------------------------------------------------------- -- HTML rendering: section helpers -- --------------------------------------------------------------------------- section :: String -> String -> String -> String section id_ title body = concat [ "
| " ++ h ++ " | ") headers, "
|---|
Longest
" , pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC)) , "Shortest
" , pageList (take 3 (sortBy (comparing piWC) hasSomeWC)) ] where hasSomeWC = filter (\p -> piWC p > 0) allPIs avgWC = if null hasSomeWC then 0 else sum (map piWC hasSomeWC) `div` length hasSomeWC pageList ps = "