{-# LANGUAGE GHC2021 #-} {-# LANGUAGE OverloadedStrings #-} -- | Build telemetry page (/build/): corpus statistics, word-length -- distribution, tag frequencies, link analysis, epistemic coverage, -- output metrics, repository overview, and build timing. -- Rendered as a full essay (3-column layout, TOC, metadata block). module Stats (statsRules) where import Control.Exception (IOException, catch) import Control.Monad (forM) import Data.Char (isSpace, toLower) import Data.List (find, isPrefixOf, isSuffixOf, sort, sortBy) import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe) import Data.Ord (comparing, Down (..)) import qualified Data.Set as Set import Data.String (fromString) import Data.Time (getCurrentTime, formatTime, defaultTimeLocale, Day, parseTimeM, utctDay, addDays, diffDays) import Data.Time.Calendar (toGregorian, dayOfWeek) import System.Directory (doesDirectoryExist, getFileSize, listDirectory, pathIsSymbolicLink) import System.Exit (ExitCode (..)) import System.FilePath (takeExtension, ()) import System.Process (readProcessWithExitCode) import Text.Read (readMaybe) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Key as AK import qualified Data.Aeson.KeyMap as KM import qualified Data.Vector as V import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as TE import qualified Text.Blaze.Html5 as H import qualified Text.Blaze.Html5.Attributes as A import Text.Blaze.Html.Renderer.String (renderHtml) import qualified Text.Blaze.Internal as BI import Hakyll import Contexts (siteCtx, authorLinksField) import qualified Patterns as P import Utils (readingTime) -- --------------------------------------------------------------------------- -- Types -- --------------------------------------------------------------------------- data TypeRow = TypeRow { trLabel :: String , trCount :: Int , trWords :: Int } data PageInfo = PageInfo { piTitle :: String , piUrl :: String , piWC :: Int } -- --------------------------------------------------------------------------- -- Hakyll helpers -- --------------------------------------------------------------------------- loadWC :: Item String -> Compiler Int loadWC item = do snap <- loadSnapshot (itemIdentifier item) "word-count" return $ fromMaybe 0 (readMaybe (itemBody snap)) loadPI :: Item String -> Compiler (Maybe PageInfo) loadPI item = do meta <- getMetadata (itemIdentifier item) mRoute <- getRoute (itemIdentifier item) wc <- loadWC item return $ fmap (\r -> PageInfo { piTitle = fromMaybe "(untitled)" (lookupString "title" meta) , piUrl = "/" ++ r , piWC = wc }) mRoute -- --------------------------------------------------------------------------- -- Formatting helpers -- --------------------------------------------------------------------------- commaInt :: Int -> String commaInt n | n < 1000 = show n | otherwise = commaInt (n `div` 1000) ++ "," ++ pad3 (n `mod` 1000) where pad3 x | x < 10 = "00" ++ show x | x < 100 = "0" ++ show x | otherwise = show x formatBytes :: Integer -> String formatBytes b | b < 1024 = show b ++ " B" | b < 1024*1024 = showD (b * 10 `div` 1024) ++ " KB" | otherwise = showD (b * 10 `div` (1024*1024)) ++ " MB" where showD n = show (n `div` 10) ++ "." ++ show (n `mod` 10) rtStr :: Int -> String rtStr totalWords | mins < 60 = show mins ++ " min" | otherwise = show (mins `div` 60) ++ "h " ++ show (mins `mod` 60) ++ "m" where mins = totalWords `div` 200 pctStr :: Int -> Int -> String pctStr _ 0 = "—" pctStr n total = show (n * 100 `div` total) ++ "%" -- | Strip HTML tags for plain-text word counting. -- -- Handles: -- * Tag bodies, including @>@ inside double-quoted attribute values -- (so @\\"a b\"\>@ doesn't slice the surrounding text). -- * HTML comments @\". dropComment ('-':'-':'>':rs) = rs dropComment (_:rs) = dropComment rs dropComment [] = [] -- Drop everything up to and including "]]>". dropCdata (']':']':'>':rs) = rs dropCdata (_:rs) = dropCdata rs dropCdata [] = [] -- Drop a tag body, respecting double-quoted attribute values. dropTag ('"':rs) = dropTag (skipQuoted rs) dropTag ('\'':rs) = dropTag (skipApos rs) dropTag ('>':rs) = rs dropTag (_:rs) = dropTag rs dropTag [] = [] skipQuoted ('"':rs) = rs skipQuoted (_:rs) = skipQuoted rs skipQuoted [] = [] skipApos ('\'':rs) = rs skipApos (_:rs) = skipApos rs skipApos [] = [] -- | Normalise a page URL for backlink map lookup (strip trailing .html). normUrl :: String -> String normUrl u | ".html" `isSuffixOf` u = take (length u - 5) u | otherwise = u pad2 :: (Show a, Integral a) => a -> String pad2 n = if n < 10 then "0" ++ show n else show n -- | Median of a non-empty list; returns 0 for empty. Uses 'drop' + -- pattern match instead of @(!!)@ so the function is total in its -- own implementation, not just by external invariant. median :: [Int] -> Int median [] = 0 median xs = case drop (length xs `div` 2) (sort xs) of (m : _) -> m [] -> 0 -- unreachable: length xs >= 1 above -- --------------------------------------------------------------------------- -- Date helpers (for /stats/ page) -- --------------------------------------------------------------------------- parseDay :: String -> Maybe Day parseDay = parseTimeM True defaultTimeLocale "%Y-%m-%d" -- | First Monday on or before 'day' (start of its ISO week). weekStart :: Day -> Day weekStart day = addDays (fromIntegral (negate (fromEnum (dayOfWeek day)))) day -- | Intensity class for the heatmap (hm0 … hm4). heatClass :: Int -> String heatClass 0 = "hm0" heatClass n | n < 500 = "hm1" heatClass n | n < 2000 = "hm2" heatClass n | n < 5000 = "hm3" heatClass _ = "hm4" shortMonth :: Int -> String shortMonth m = case m of 1 -> "Jan"; 2 -> "Feb"; 3 -> "Mar"; 4 -> "Apr" 5 -> "May"; 6 -> "Jun"; 7 -> "Jul"; 8 -> "Aug" 9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec" _ -> "" -- --------------------------------------------------------------------------- -- URL sanitization and core HTML combinators -- --------------------------------------------------------------------------- -- | Defense-in-depth URL allowlist: reject anything that isn't an internal -- path, a fragment, or an explicit safe scheme. Case-insensitive and -- whitespace-tolerant to block @JavaScript:@, @\tjavascript:@, @data:@, etc. -- @http://@ is intentionally excluded to avoid mixed-content warnings. -- -- Protocol-relative URLs (@//evil.com@) are rejected because the leading -- slash would otherwise admit them through the @\"\/\"@ prefix check. isSafeUrl :: String -> Bool isSafeUrl u = let norm = map toLower (dropWhile isSpace u) in not ("//" `isPrefixOf` norm) && any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"] safeHref :: String -> H.AttributeValue safeHref u | isSafeUrl u = H.stringValue u | otherwise = H.stringValue "#" -- | Shorthand for 'H.toHtml' over a 'String'. txt :: String -> H.Html txt = H.toHtml -- | Anchor element with escaped title text and URL sanitized via 'safeHref'. -- Use for trusted plain-text labels such as tag slugs. link :: String -> String -> H.Html link url title = H.a H.! A.href (safeHref url) $ H.toHtml title -- | Anchor for a content page, where the title comes from frontmatter and -- may contain author-authored inline HTML (e.g. @Book Title@). -- The URL is still sanitized via 'safeHref'; the title is emitted -- pre-escaped, matching site convention that metadata titles are -- author-controlled trusted HTML. pageLink :: String -> String -> H.Html pageLink url title = H.a H.! A.href (safeHref url) $ H.preEscapedToHtml title -- | Typed section header followed by its body content. section :: String -> String -> H.Html -> H.Html section id_ title body = do H.h2 H.! A.id (H.stringValue id_) $ H.toHtml title body -- | Build-telemetry table with header row, body rows, and an optional total -- row. Cell contents are pre-rendered 'H.Html' so callers may embed links or -- emphasis inside cells without risking double-escaping. table :: [String] -> [[H.Html]] -> Maybe [H.Html] -> H.Html table headers rows mFoot = H.table H.! A.class_ "build-table" $ do H.thead $ H.tr $ mapM_ (H.th . H.toHtml) headers H.tbody $ mapM_ renderRow rows maybe (return ()) renderFoot mFoot where renderRow cells = H.tr $ mapM_ H.td cells renderFoot cells = H.tfoot $ H.tr H.! A.class_ "build-total" $ mapM_ H.td cells -- | Two-column metadata block: each pair becomes a @
/
@ entry. Values -- are 'H.Html' to allow mixing links and plain text. dl :: [(String, H.Html)] -> H.Html dl pairs = H.dl H.! A.class_ "build-meta" $ mapM_ (\(k, v) -> do H.dt (H.toHtml k); H.dd v) pairs -- --------------------------------------------------------------------------- -- SVG / custom element helpers (no blaze-svg dependency) -- --------------------------------------------------------------------------- svgTag, rectTag, textTag, titleTag :: H.Html -> H.Html svgTag = BI.customParent "svg" rectTag = BI.customParent "rect" textTag = BI.customParent "text" titleTag = BI.customParent "title" -- | Attach an attribute that isn't in 'Text.Blaze.Html5.Attributes' (e.g. -- SVG @viewBox@, @x@, @y@, or @data-target@). customAttr :: String -> String -> H.Attribute customAttr name val = BI.customAttribute (fromString name) (fromString val) -- --------------------------------------------------------------------------- -- Heatmap SVG -- --------------------------------------------------------------------------- -- | 52-week writing activity heatmap. Styled via @.heatmap-svg@ rules in -- static/css/build.css (no inline @