963 lines
41 KiB
Haskell
963 lines
41 KiB
Haskell
{-# LANGUAGE GHC2021 #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
-- | Build telemetry page (/build/): corpus statistics, word-length
|
|
-- distribution, tag frequencies, link analysis, epistemic coverage,
|
|
-- output metrics, repository overview, and build timing.
|
|
-- Rendered as a full essay (3-column layout, TOC, metadata block).
|
|
module Stats (statsRules) where
|
|
|
|
import Control.Exception (IOException, catch)
|
|
import Control.Monad (forM)
|
|
import Data.Char (isSpace, toLower)
|
|
import Data.List (find, isPrefixOf, isSuffixOf, sort, sortBy)
|
|
import qualified Data.Map.Strict as Map
|
|
import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
|
|
import Data.Ord (comparing, Down (..))
|
|
import qualified Data.Set as Set
|
|
import Data.String (fromString)
|
|
import Data.Time (getCurrentTime, formatTime, defaultTimeLocale,
|
|
Day, parseTimeM, utctDay, addDays, diffDays)
|
|
import Data.Time.Calendar (toGregorian, dayOfWeek)
|
|
import System.Directory (doesDirectoryExist, getFileSize, listDirectory,
|
|
pathIsSymbolicLink)
|
|
import System.Exit (ExitCode (..))
|
|
import System.FilePath (takeExtension, (</>))
|
|
import System.Process (readProcessWithExitCode)
|
|
import Text.Read (readMaybe)
|
|
import qualified Data.Aeson as Aeson
|
|
import qualified Data.Aeson.Key as AK
|
|
import qualified Data.Aeson.KeyMap as KM
|
|
import qualified Data.Vector as V
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.IO as TIO
|
|
import qualified Data.Text.Encoding as TE
|
|
import qualified Text.Blaze.Html5 as H
|
|
import qualified Text.Blaze.Html5.Attributes as A
|
|
import Text.Blaze.Html.Renderer.String (renderHtml)
|
|
import qualified Text.Blaze.Internal as BI
|
|
import Hakyll
|
|
import Contexts (siteCtx, authorLinksField)
|
|
import qualified Patterns as P
|
|
import Utils (readingTime)
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Types
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
data TypeRow = TypeRow
|
|
{ trLabel :: String
|
|
, trCount :: Int
|
|
, trWords :: Int
|
|
}
|
|
|
|
data PageInfo = PageInfo
|
|
{ piTitle :: String
|
|
, piUrl :: String
|
|
, piWC :: Int
|
|
}
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Hakyll helpers
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
loadWC :: Item String -> Compiler Int
|
|
loadWC item = do
|
|
snap <- loadSnapshot (itemIdentifier item) "word-count"
|
|
return $ fromMaybe 0 (readMaybe (itemBody snap))
|
|
|
|
loadPI :: Item String -> Compiler (Maybe PageInfo)
|
|
loadPI item = do
|
|
meta <- getMetadata (itemIdentifier item)
|
|
mRoute <- getRoute (itemIdentifier item)
|
|
wc <- loadWC item
|
|
return $ fmap (\r -> PageInfo
|
|
{ piTitle = fromMaybe "(untitled)" (lookupString "title" meta)
|
|
, piUrl = "/" ++ r
|
|
, piWC = wc
|
|
}) mRoute
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Formatting helpers
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
commaInt :: Int -> String
|
|
commaInt n
|
|
| n < 1000 = show n
|
|
| otherwise = commaInt (n `div` 1000) ++ "," ++ pad3 (n `mod` 1000)
|
|
where
|
|
pad3 x
|
|
| x < 10 = "00" ++ show x
|
|
| x < 100 = "0" ++ show x
|
|
| otherwise = show x
|
|
|
|
formatBytes :: Integer -> String
|
|
formatBytes b
|
|
| b < 1024 = show b ++ " B"
|
|
| b < 1024*1024 = showD (b * 10 `div` 1024) ++ " KB"
|
|
| otherwise = showD (b * 10 `div` (1024*1024)) ++ " MB"
|
|
where showD n = show (n `div` 10) ++ "." ++ show (n `mod` 10)
|
|
|
|
rtStr :: Int -> String
|
|
rtStr totalWords
|
|
| mins < 60 = show mins ++ " min"
|
|
| otherwise = show (mins `div` 60) ++ "h " ++ show (mins `mod` 60) ++ "m"
|
|
where mins = totalWords `div` 200
|
|
|
|
pctStr :: Int -> Int -> String
|
|
pctStr _ 0 = "—"
|
|
pctStr n total = show (n * 100 `div` total) ++ "%"
|
|
|
|
-- | Strip HTML tags for plain-text word counting.
|
|
--
|
|
-- Handles:
|
|
-- * Tag bodies, including @>@ inside double-quoted attribute values
|
|
-- (so @\<img alt=\"a > b\"\>@ doesn't slice the surrounding text).
|
|
-- * HTML comments @\<!-- ... --\>@ as a unit.
|
|
-- * @\<![CDATA[ ... ]]\>@ sections.
|
|
--
|
|
-- This is still a heuristic — it does not validate the HTML — but it
|
|
-- closes the most common ways for "tag stripping" to leak content.
|
|
stripHtmlTags :: String -> String
|
|
stripHtmlTags = go
|
|
where
|
|
go [] = []
|
|
go ('<':'!':'-':'-':rest) = go (dropComment rest)
|
|
go ('<':'!':'[':'C':'D':'A':'T':'A':'[':rest)
|
|
= go (dropCdata rest)
|
|
go ('<':rest) = go (dropTag rest)
|
|
go (c:rest) = c : go rest
|
|
|
|
-- Drop everything up to and including "-->".
|
|
dropComment ('-':'-':'>':rs) = rs
|
|
dropComment (_:rs) = dropComment rs
|
|
dropComment [] = []
|
|
|
|
-- Drop everything up to and including "]]>".
|
|
dropCdata (']':']':'>':rs) = rs
|
|
dropCdata (_:rs) = dropCdata rs
|
|
dropCdata [] = []
|
|
|
|
-- Drop a tag body, respecting double-quoted attribute values.
|
|
dropTag ('"':rs) = dropTag (skipQuoted rs)
|
|
dropTag ('\'':rs) = dropTag (skipApos rs)
|
|
dropTag ('>':rs) = rs
|
|
dropTag (_:rs) = dropTag rs
|
|
dropTag [] = []
|
|
|
|
skipQuoted ('"':rs) = rs
|
|
skipQuoted (_:rs) = skipQuoted rs
|
|
skipQuoted [] = []
|
|
|
|
skipApos ('\'':rs) = rs
|
|
skipApos (_:rs) = skipApos rs
|
|
skipApos [] = []
|
|
|
|
-- | Normalise a page URL for backlink map lookup (strip trailing .html).
|
|
normUrl :: String -> String
|
|
normUrl u
|
|
| ".html" `isSuffixOf` u = take (length u - 5) u
|
|
| otherwise = u
|
|
|
|
pad2 :: (Show a, Integral a) => a -> String
|
|
pad2 n = if n < 10 then "0" ++ show n else show n
|
|
|
|
-- | Median of a non-empty list; returns 0 for empty. Uses 'drop' +
|
|
-- pattern match instead of @(!!)@ so the function is total in its
|
|
-- own implementation, not just by external invariant.
|
|
median :: [Int] -> Int
|
|
median [] = 0
|
|
median xs =
|
|
case drop (length xs `div` 2) (sort xs) of
|
|
(m : _) -> m
|
|
[] -> 0 -- unreachable: length xs >= 1 above
|
|
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Date helpers (for /stats/ page)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
parseDay :: String -> Maybe Day
|
|
parseDay = parseTimeM True defaultTimeLocale "%Y-%m-%d"
|
|
|
|
-- | First Monday on or before 'day' (start of its ISO week).
|
|
weekStart :: Day -> Day
|
|
weekStart day = addDays (fromIntegral (negate (fromEnum (dayOfWeek day)))) day
|
|
|
|
-- | Intensity class for the heatmap (hm0 … hm4).
|
|
heatClass :: Int -> String
|
|
heatClass 0 = "hm0"
|
|
heatClass n | n < 500 = "hm1"
|
|
heatClass n | n < 2000 = "hm2"
|
|
heatClass n | n < 5000 = "hm3"
|
|
heatClass _ = "hm4"
|
|
|
|
shortMonth :: Int -> String
|
|
shortMonth m = case m of
|
|
1 -> "Jan"; 2 -> "Feb"; 3 -> "Mar"; 4 -> "Apr"
|
|
5 -> "May"; 6 -> "Jun"; 7 -> "Jul"; 8 -> "Aug"
|
|
9 -> "Sep"; 10 -> "Oct"; 11 -> "Nov"; 12 -> "Dec"
|
|
_ -> ""
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- URL sanitization and core HTML combinators
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Defense-in-depth URL allowlist: reject anything that isn't an internal
|
|
-- path, a fragment, or an explicit safe scheme. Case-insensitive and
|
|
-- whitespace-tolerant to block @JavaScript:@, @\tjavascript:@, @data:@, etc.
|
|
-- @http://@ is intentionally excluded to avoid mixed-content warnings.
|
|
--
|
|
-- Protocol-relative URLs (@//evil.com@) are rejected because the leading
|
|
-- slash would otherwise admit them through the @\"\/\"@ prefix check.
|
|
isSafeUrl :: String -> Bool
|
|
isSafeUrl u =
|
|
let norm = map toLower (dropWhile isSpace u)
|
|
in not ("//" `isPrefixOf` norm)
|
|
&& any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
|
|
|
|
safeHref :: String -> H.AttributeValue
|
|
safeHref u
|
|
| isSafeUrl u = H.stringValue u
|
|
| otherwise = H.stringValue "#"
|
|
|
|
-- | Shorthand for 'H.toHtml' over a 'String'.
|
|
txt :: String -> H.Html
|
|
txt = H.toHtml
|
|
|
|
-- | Anchor element with escaped title text and URL sanitized via 'safeHref'.
|
|
-- Use for trusted plain-text labels such as tag slugs.
|
|
link :: String -> String -> H.Html
|
|
link url title = H.a H.! A.href (safeHref url) $ H.toHtml title
|
|
|
|
-- | Anchor for a content page, where the title comes from frontmatter and
|
|
-- may contain author-authored inline HTML (e.g. @<em>Book Title</em>@).
|
|
-- The URL is still sanitized via 'safeHref'; the title is emitted
|
|
-- pre-escaped, matching site convention that metadata titles are
|
|
-- author-controlled trusted HTML.
|
|
pageLink :: String -> String -> H.Html
|
|
pageLink url title = H.a H.! A.href (safeHref url) $ H.preEscapedToHtml title
|
|
|
|
-- | Typed section header followed by its body content.
|
|
section :: String -> String -> H.Html -> H.Html
|
|
section id_ title body = do
|
|
H.h2 H.! A.id (H.stringValue id_) $ H.toHtml title
|
|
body
|
|
|
|
-- | Build-telemetry table with header row, body rows, and an optional total
|
|
-- row. Cell contents are pre-rendered 'H.Html' so callers may embed links or
|
|
-- emphasis inside cells without risking double-escaping.
|
|
table :: [String] -> [[H.Html]] -> Maybe [H.Html] -> H.Html
|
|
table headers rows mFoot =
|
|
H.table H.! A.class_ "build-table" $ do
|
|
H.thead $ H.tr $ mapM_ (H.th . H.toHtml) headers
|
|
H.tbody $ mapM_ renderRow rows
|
|
maybe (return ()) renderFoot mFoot
|
|
where
|
|
renderRow cells = H.tr $ mapM_ H.td cells
|
|
renderFoot cells = H.tfoot $
|
|
H.tr H.! A.class_ "build-total" $ mapM_ H.td cells
|
|
|
|
-- | Two-column metadata block: each pair becomes a @<dt>/<dd>@ entry. Values
|
|
-- are 'H.Html' to allow mixing links and plain text.
|
|
dl :: [(String, H.Html)] -> H.Html
|
|
dl pairs = H.dl H.! A.class_ "build-meta" $
|
|
mapM_ (\(k, v) -> do H.dt (H.toHtml k); H.dd v) pairs
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- SVG / custom element helpers (no blaze-svg dependency)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
svgTag, rectTag, textTag, titleTag :: H.Html -> H.Html
|
|
svgTag = BI.customParent "svg"
|
|
rectTag = BI.customParent "rect"
|
|
textTag = BI.customParent "text"
|
|
titleTag = BI.customParent "title"
|
|
|
|
-- | Attach an attribute that isn't in 'Text.Blaze.Html5.Attributes' (e.g.
|
|
-- SVG @viewBox@, @x@, @y@, or @data-target@).
|
|
customAttr :: String -> String -> H.Attribute
|
|
customAttr name val = BI.customAttribute (fromString name) (fromString val)
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Heatmap SVG
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | 52-week writing activity heatmap. Styled via @.heatmap-svg@ rules in
|
|
-- static/css/build.css (no inline @<style>@).
|
|
renderHeatmap :: Map.Map Day Int -> Day -> H.Html
|
|
renderHeatmap wordsByDay today =
|
|
let cellSz = 10 :: Int
|
|
gap = 2 :: Int
|
|
step = cellSz + gap
|
|
hdrH = 22 :: Int -- vertical space for month labels
|
|
nWeeks = 52
|
|
-- First Monday of the 52-week window
|
|
startDay = addDays (fromIntegral (-(nWeeks - 1)) * 7) (weekStart today)
|
|
nDays = diffDays today startDay + 1
|
|
allDays = [addDays i startDay | i <- [0 .. nDays - 1]]
|
|
weekOf d = fromIntegral (diffDays d startDay `div` 7) :: Int
|
|
dowOf d = fromEnum (dayOfWeek d) -- Mon=0..Sun=6
|
|
svgW = (nWeeks - 1) * step + cellSz
|
|
svgH = 6 * step + cellSz + hdrH
|
|
|
|
monthLabel d =
|
|
let (_, mo, da) = toGregorian d
|
|
in if da == 1
|
|
then textTag H.! A.class_ "hm-lbl"
|
|
H.! customAttr "x" (show (weekOf d * step))
|
|
H.! customAttr "y" "14"
|
|
$ txt (shortMonth mo)
|
|
else mempty
|
|
|
|
dayCell d =
|
|
let wc = fromMaybe 0 (Map.lookup d wordsByDay)
|
|
(yr, mo, da) = toGregorian d
|
|
x = weekOf d * step
|
|
y = dowOf d * step + hdrH
|
|
tip = show yr ++ "-" ++ pad2 mo ++ "-" ++ pad2 da
|
|
++ if wc > 0 then ": " ++ commaInt wc ++ " words" else ""
|
|
in rectTag H.! A.class_ (H.stringValue (heatClass wc))
|
|
H.! customAttr "x" (show x)
|
|
H.! customAttr "y" (show y)
|
|
H.! customAttr "width" (show cellSz)
|
|
H.! customAttr "height" (show cellSz)
|
|
H.! customAttr "rx" "2"
|
|
$ titleTag (txt tip)
|
|
|
|
legendW = 5 * step - gap
|
|
legendCell i =
|
|
rectTag H.! A.class_ (H.stringValue ("hm" ++ show i))
|
|
H.! customAttr "x" (show (i * step))
|
|
H.! customAttr "y" "0"
|
|
H.! customAttr "width" (show cellSz)
|
|
H.! customAttr "height" (show cellSz)
|
|
H.! customAttr "rx" "2"
|
|
$ mempty
|
|
|
|
legendSvg =
|
|
svgTag H.! customAttr "width" (show legendW)
|
|
H.! customAttr "height" (show cellSz)
|
|
H.! customAttr "viewBox" ("0 0 " ++ show legendW ++ " " ++ show cellSz)
|
|
H.! customAttr "style" "display:inline;vertical-align:middle"
|
|
$ mapM_ legendCell [0 .. 4 :: Int]
|
|
|
|
in H.figure H.! A.class_ "stats-heatmap" $ do
|
|
svgTag H.! customAttr "width" (show svgW)
|
|
H.! customAttr "height" (show svgH)
|
|
H.! customAttr "viewBox" ("0 0 " ++ show svgW ++ " " ++ show svgH)
|
|
H.! A.class_ "heatmap-svg"
|
|
H.! customAttr "role" "img"
|
|
H.! customAttr "aria-label" "52-week writing activity heatmap"
|
|
$ do
|
|
mapM_ monthLabel allDays
|
|
mapM_ dayCell allDays
|
|
H.figcaption H.! A.class_ "heatmap-legend" $ do
|
|
"Less\xA0"
|
|
legendSvg
|
|
"\xA0More"
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Stats page sections
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
renderMonthlyVolume :: Map.Map Day Int -> H.Html
|
|
renderMonthlyVolume wordsByDay =
|
|
section "volume" "Monthly volume" $
|
|
let byMonth = Map.fromListWith (+)
|
|
[ ((y, m), wc)
|
|
| (day, wc) <- Map.toList wordsByDay
|
|
, let (y, m, _) = toGregorian day
|
|
]
|
|
in if Map.null byMonth
|
|
then H.p (H.em "No dated content yet.")
|
|
else
|
|
let maxWC = max 1 $ maximum $ Map.elems byMonth
|
|
bar (y, m) =
|
|
let wc = fromMaybe 0 (Map.lookup (y, m) byMonth)
|
|
pct = if wc == 0 then 0 else max 2 (wc * 100 `div` maxWC)
|
|
lbl = shortMonth m ++ " \x2019" ++ drop 2 (show y)
|
|
in H.div H.! A.class_ "build-bar-row" $ do
|
|
H.span H.! A.class_ "build-bar-label" $ txt lbl
|
|
H.span H.! A.class_ "build-bar-wrap" $
|
|
H.span H.! A.class_ "build-bar"
|
|
H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
|
|
$ mempty
|
|
H.span H.! A.class_ "build-bar-count" $
|
|
if wc > 0 then txt (commaInt wc) else mempty
|
|
in H.div H.! A.class_ "build-bars" $
|
|
mapM_ bar (Map.keys byMonth)
|
|
|
|
renderCorpus :: [TypeRow] -> [PageInfo] -> H.Html
|
|
renderCorpus typeRows allPIs =
|
|
section "corpus" "Corpus" $ do
|
|
dl [ ("Total words", txt (commaInt totalWords))
|
|
, ("Total pages", txt (commaInt (length allPIs)))
|
|
, ("Total reading time", txt (rtStr totalWords))
|
|
, ("Average length", txt (commaInt avgWC ++ " words"))
|
|
, ("Median length", txt (commaInt medWC ++ " words"))
|
|
]
|
|
table ["Type", "Pages", "Words", "Reading time"]
|
|
(map row typeRows)
|
|
(Just [ "Total"
|
|
, txt (commaInt (sum (map trCount typeRows)))
|
|
, txt (commaInt totalWords)
|
|
, txt (rtStr totalWords)
|
|
])
|
|
where
|
|
hasSomeWC = filter (\p -> piWC p > 0) allPIs
|
|
totalWords = sum (map trWords typeRows)
|
|
avgWC = if null hasSomeWC then 0 else totalWords `div` length hasSomeWC
|
|
medWC = median (map piWC hasSomeWC)
|
|
row r = [ txt (trLabel r)
|
|
, txt (commaInt (trCount r))
|
|
, txt (commaInt (trWords r))
|
|
, txt (rtStr (trWords r))
|
|
]
|
|
|
|
renderNotable :: [PageInfo] -> H.Html
|
|
renderNotable allPIs =
|
|
section "notable" "Notable" $ do
|
|
H.p (H.strong "Longest")
|
|
pageList (take 5 (sortBy (comparing (Down . piWC)) hasSomeWC))
|
|
H.p (H.strong "Shortest")
|
|
pageList (take 5 (sortBy (comparing piWC) hasSomeWC))
|
|
where
|
|
hasSomeWC = filter (\p -> piWC p > 50) allPIs
|
|
pageList ps = H.ol H.! A.class_ "build-page-list" $
|
|
mapM_ (\p -> H.li $ do
|
|
pageLink (piUrl p) (piTitle p)
|
|
txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
|
|
) ps
|
|
|
|
-- | Renamed/aliased to 'renderTagsSection' below — kept as a name for
|
|
-- legacy call sites until they are migrated. Defining it as the same
|
|
-- function (instead of an independent copy) prevents the two from
|
|
-- drifting silently.
|
|
renderStatsTags :: [(String, Int)] -> Int -> H.Html
|
|
renderStatsTags = renderTagsSection
|
|
|
|
statsTOC :: H.Html
|
|
statsTOC = H.ol $ mapM_ item entries
|
|
where
|
|
item (i, t) =
|
|
H.li $ H.a H.! A.href (H.stringValue ("#" ++ i))
|
|
H.! customAttr "data-target" i
|
|
$ txt t
|
|
entries = [ ("activity", "Writing activity")
|
|
, ("volume", "Monthly volume")
|
|
, ("corpus", "Corpus")
|
|
, ("notable", "Notable")
|
|
, ("tags", "Tags")
|
|
]
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- IO: output directory walk
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
-- | Recursively walk a directory, returning @(file, size)@ tuples for every
|
|
-- regular file beneath it.
|
|
--
|
|
-- Symlinks (both files and directories) are skipped, so a stray
|
|
-- @_site\/a -> _site@ doesn't trigger an infinite loop.
|
|
walkDir :: FilePath -> IO [(FilePath, Integer)]
|
|
walkDir dir = do
|
|
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
|
|
fmap concat $ forM entries $ \e -> do
|
|
let path = dir </> e
|
|
isLink <- pathIsSymbolicLink path
|
|
`catch` (\(_ :: IOException) -> return False)
|
|
if isLink
|
|
then return []
|
|
else do
|
|
isDir <- doesDirectoryExist path
|
|
if isDir
|
|
then walkDir path
|
|
else do
|
|
sz <- getFileSize path
|
|
`catch` (\(_ :: IOException) -> return 0)
|
|
return [(path, sz)]
|
|
|
|
displayExt :: FilePath -> String
|
|
displayExt path = case takeExtension path of
|
|
".html" -> ".html"
|
|
".css" -> ".css"
|
|
".js" -> ".js"
|
|
".woff2" -> ".woff2"
|
|
".svg" -> ".svg"
|
|
".mp3" -> ".mp3"
|
|
".pdf" -> ".pdf"
|
|
".json" -> ".json"
|
|
".xml" -> ".xml"
|
|
".ico" -> ".ico"
|
|
".png" -> "image"
|
|
".jpg" -> "image"
|
|
".jpeg" -> "image"
|
|
".webp" -> "image"
|
|
_ -> "other"
|
|
|
|
getOutputStats :: IO (Map.Map String (Int, Integer), Int, Integer)
|
|
getOutputStats = do
|
|
files <- walkDir "_site"
|
|
let grouped = foldr (\(path, sz) acc ->
|
|
Map.insertWith (\(c1,s1) (c2,s2) -> (c1+c2, s1+s2))
|
|
(displayExt path)
|
|
(1, sz) acc)
|
|
Map.empty files
|
|
return (grouped, length files, sum (map snd files))
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- IO: lines of code
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
countLinesDir :: FilePath -> String -> (FilePath -> Bool) -> IO (Int, Int)
|
|
countLinesDir dir ext skipPred = do
|
|
entries <- listDirectory dir `catch` (\(_ :: IOException) -> return [])
|
|
let files = filter (\e -> takeExtension e == ext && not (skipPred e)) entries
|
|
-- Use strict text IO so the file handle is released as soon as the
|
|
-- contents are read; the prior 'readFile' chained lazy IO under
|
|
-- 'forM', leaving every handle open until the loop forced 'lines'.
|
|
ls <- fmap sum $ forM files $ \e -> do
|
|
content <- TIO.readFile (dir </> e)
|
|
`catch` (\(_ :: IOException) -> return T.empty)
|
|
return (length (T.lines content))
|
|
return (length files, ls)
|
|
|
|
getLocStats :: IO (Int, Int, Int, Int, Int, Int)
|
|
-- (hsFiles, hsLines, cssFiles, cssLines, jsFiles, jsLines)
|
|
getLocStats = do
|
|
(hf, hl) <- countLinesDir "build" ".hs" (const False)
|
|
(cf, cl) <- countLinesDir "static/css" ".css" (const False)
|
|
(jf, jl) <- countLinesDir "static/js" ".js" (".min.js" `isSuffixOf`)
|
|
return (hf, hl, cf, cl, jf, jl)
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- IO: git stats
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
gitRun :: [String] -> IO String
|
|
gitRun args = do
|
|
(ec, out, _) <- readProcessWithExitCode "git" args ""
|
|
return $ if ec == ExitSuccess then out else ""
|
|
|
|
getGitStats :: IO (Int, String)
|
|
getGitStats = do
|
|
countOut <- gitRun ["rev-list", "--count", "HEAD"]
|
|
firstOut <- gitRun ["log", "--format=%ad", "--date=short", "--reverse"]
|
|
let commits = fromMaybe 0 (readMaybe (filter (/= '\n') countOut) :: Maybe Int)
|
|
firstDate = case lines firstOut of { (d:_) -> d; _ -> "\x2014" }
|
|
return (commits, firstDate)
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- HTML rendering: build page sections
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
renderContent :: [TypeRow] -> H.Html
|
|
renderContent rows =
|
|
section "content" "Content" $
|
|
table ["Type", "Count", "Words", "Reading time"]
|
|
(map row rows)
|
|
(Just [ "Total"
|
|
, txt (commaInt totalCount)
|
|
, txt (commaInt totalWords)
|
|
, txt (rtStr totalWords)
|
|
])
|
|
where
|
|
totalCount = sum (map trCount rows)
|
|
totalWords = sum (map trWords rows)
|
|
row r = [ txt (trLabel r)
|
|
, txt (commaInt (trCount r))
|
|
, txt (commaInt (trWords r))
|
|
, txt (rtStr (trWords r))
|
|
]
|
|
|
|
renderPages :: [PageInfo]
|
|
-> Maybe (String, String, String)
|
|
-> Maybe (String, String, String)
|
|
-> H.Html
|
|
renderPages allPIs mOldest mNewest =
|
|
section "pages" "Pages" $ do
|
|
dl $
|
|
[ ("Total pages", txt (commaInt (length allPIs)))
|
|
, ("Average length", txt (commaInt avgWC ++ " words"))
|
|
] ++
|
|
maybe [] (\(d,t,u) -> [("Oldest content", datedLink d t u)]) mOldest ++
|
|
maybe [] (\(d,t,u) -> [("Newest content", datedLink d t u)]) mNewest
|
|
H.p (H.strong "Longest")
|
|
pageList (take 3 (sortBy (comparing (Down . piWC)) hasSomeWC))
|
|
H.p (H.strong "Shortest")
|
|
pageList (take 3 (sortBy (comparing piWC) hasSomeWC))
|
|
where
|
|
hasSomeWC = filter (\p -> piWC p > 0) allPIs
|
|
avgWC = if null hasSomeWC then 0
|
|
else sum (map piWC hasSomeWC) `div` length hasSomeWC
|
|
datedLink d t u = do
|
|
txt (d ++ " \x2014 ")
|
|
pageLink u t
|
|
pageList ps = H.ol H.! A.class_ "build-page-list" $
|
|
mapM_ (\p -> H.li $ do
|
|
pageLink (piUrl p) (piTitle p)
|
|
txt (" \x2014 " ++ commaInt (piWC p) ++ " words")
|
|
) ps
|
|
|
|
renderDistribution :: [Int] -> H.Html
|
|
renderDistribution wcs =
|
|
section "distribution" "Word-length distribution" $
|
|
H.div H.! A.class_ "build-bars" $ mapM_ bar buckets
|
|
where
|
|
bucketOf w
|
|
| w < 500 = 0
|
|
| w < 1000 = 1
|
|
| w < 2000 = 2
|
|
| w < 5000 = 3
|
|
| otherwise = 4
|
|
labels :: [H.Html]
|
|
labels = [ "< 500"
|
|
, "500 \x2013 1k"
|
|
, "1k \x2013 2k"
|
|
, "2k \x2013 5k"
|
|
, "\x2265 5k"
|
|
]
|
|
counts = foldr (\w acc -> Map.insertWith (+) (bucketOf w) (1 :: Int) acc)
|
|
(Map.fromList [(i, 0 :: Int) | i <- [0 .. 4]]) wcs
|
|
buckets = [(labels !! i, fromMaybe 0 (Map.lookup i counts)) | i <- [0 .. 4]]
|
|
maxCount = max 1 (maximum (map snd buckets))
|
|
bar (lbl, n) =
|
|
let pct = n * 100 `div` maxCount
|
|
in H.div H.! A.class_ "build-bar-row" $ do
|
|
H.span H.! A.class_ "build-bar-label" $ lbl
|
|
H.span H.! A.class_ "build-bar-wrap" $
|
|
H.span H.! A.class_ "build-bar"
|
|
H.! A.style (H.stringValue ("width:" ++ show pct ++ "%"))
|
|
$ mempty
|
|
H.span H.! A.class_ "build-bar-count" $ txt (show n)
|
|
|
|
renderTagsSection :: [(String, Int)] -> Int -> H.Html
|
|
renderTagsSection topTags uniqueCount =
|
|
section "tags" "Tags" $ do
|
|
dl [("Unique tags", txt (commaInt uniqueCount))]
|
|
table ["Tag", "Items"] (map row topTags) Nothing
|
|
where
|
|
row (t, n) = [link ("/" ++ t ++ "/") t, txt (show n)]
|
|
|
|
renderLinks :: Maybe (String, Int, String) -> Int -> Int -> H.Html
|
|
renderLinks mMostLinked orphanCount total =
|
|
section "links" "Links" $
|
|
dl
|
|
[ case mMostLinked of
|
|
Nothing -> ("Most-linked page", "\x2014")
|
|
Just (u, n, t) ->
|
|
( "Most-linked page"
|
|
, do pageLink u t
|
|
txt (" (" ++ show n ++ " inbound links)")
|
|
)
|
|
, ( "Orphan pages"
|
|
, txt (commaInt orphanCount
|
|
++ " of " ++ commaInt total
|
|
++ " (" ++ pctStr orphanCount total ++ ")")
|
|
)
|
|
]
|
|
|
|
renderEpistemic :: Int -> Int -> Int -> Int -> Int -> H.Html
|
|
renderEpistemic total ws wc wi we =
|
|
section "epistemic" "Epistemic coverage" $
|
|
table
|
|
["Field", "Set", "Coverage"]
|
|
[ row "Status" ws
|
|
, row "Confidence" wc
|
|
, row "Importance" wi
|
|
, row "Evidence" we
|
|
]
|
|
Nothing
|
|
where
|
|
row label n = [ txt label
|
|
, txt (show n ++ " / " ++ show total)
|
|
, txt (pctStr n total)
|
|
]
|
|
|
|
renderOutput :: Map.Map String (Int, Integer) -> Int -> Integer -> H.Html
|
|
renderOutput grouped totalFiles totalSize =
|
|
section "output" "Output" $
|
|
table
|
|
["Type", "Files", "Size"]
|
|
(map row (sortBy (comparing (Down . snd . snd)) (Map.toList grouped)))
|
|
(Just [ "Total"
|
|
, txt (commaInt totalFiles)
|
|
, txt (formatBytes totalSize)
|
|
])
|
|
where
|
|
row (ext, (n, sz)) = [txt ext, txt (commaInt n), txt (formatBytes sz)]
|
|
|
|
renderRepository :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> String -> H.Html
|
|
renderRepository hf hl cf cl jf jl commits firstDate =
|
|
section "repository" "Repository" $
|
|
dl
|
|
[ ("Haskell", txt (commaInt hl ++ " lines across " ++ show hf ++ " files"))
|
|
, ("CSS", txt (commaInt cl ++ " lines across " ++ show cf ++ " files"))
|
|
, ("JavaScript", txt (commaInt jl ++ " lines across " ++ show jf ++ " files (excl. minified)"))
|
|
, ("Total git commits", txt (commaInt commits))
|
|
, ("Repository started", txt firstDate)
|
|
]
|
|
|
|
renderBuild :: String -> String -> H.Html
|
|
renderBuild ts dur =
|
|
section "build" "Build" $
|
|
dl
|
|
[ ("Generated", txt ts)
|
|
, ("Last build duration", txt dur)
|
|
]
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Static TOC (matches the nine h2 sections above)
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
pageTOC :: H.Html
|
|
pageTOC = H.ol $ mapM_ item sections
|
|
where
|
|
item (id_, title) =
|
|
H.li $ H.a H.! A.href (H.stringValue ("#" ++ id_))
|
|
H.! customAttr "data-target" id_
|
|
$ txt title
|
|
sections =
|
|
[ ("content", "Content")
|
|
, ("pages", "Pages")
|
|
, ("distribution", "Word-length distribution")
|
|
, ("tags", "Tags")
|
|
, ("links", "Links")
|
|
, ("epistemic", "Epistemic coverage")
|
|
, ("output", "Output")
|
|
, ("repository", "Repository")
|
|
, ("build", "Build")
|
|
]
|
|
|
|
-- ---------------------------------------------------------------------------
|
|
-- Rules
|
|
-- ---------------------------------------------------------------------------
|
|
|
|
statsRules :: Tags -> Rules ()
|
|
statsRules tags = do
|
|
|
|
-- -------------------------------------------------------------------------
|
|
-- Build telemetry page (/build/)
|
|
-- -------------------------------------------------------------------------
|
|
create ["build/index.html"] $ do
|
|
route idRoute
|
|
compile $ do
|
|
-- ----------------------------------------------------------------
|
|
-- Load all content items
|
|
-- ----------------------------------------------------------------
|
|
essays <- loadAll (P.essayPattern .&&. hasNoVersion)
|
|
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
|
|
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
|
|
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
|
|
comps <- loadAll ("content/music/*/index.md" .&&. hasNoVersion)
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Word counts
|
|
-- ----------------------------------------------------------------
|
|
essayWCs <- mapM loadWC essays
|
|
postWCs <- mapM loadWC posts
|
|
poemWCs <- mapM loadWC poems
|
|
fictionWCs <- mapM loadWC fiction
|
|
compWCs <- mapM loadWC comps
|
|
|
|
let allWCs = essayWCs ++ postWCs ++ poemWCs ++ fictionWCs ++ compWCs
|
|
rows =
|
|
[ TypeRow "Essays" (length essays) (sum essayWCs)
|
|
, TypeRow "Blog posts" (length posts) (sum postWCs)
|
|
, TypeRow "Poems" (length poems) (sum poemWCs)
|
|
, TypeRow "Fiction" (length fiction) (sum fictionWCs)
|
|
, TypeRow "Compositions" (length comps) (sum compWCs)
|
|
]
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Per-page info (title + URL + word count)
|
|
-- ----------------------------------------------------------------
|
|
allItems <- return (essays ++ posts ++ poems ++ fiction ++ comps)
|
|
allPIs <- catMaybes <$> mapM loadPI allItems
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Dates (essays + posts only)
|
|
-- ----------------------------------------------------------------
|
|
let getDateMeta item = do
|
|
meta <- getMetadata (itemIdentifier item)
|
|
mRoute <- getRoute (itemIdentifier item)
|
|
let d = fromMaybe "" (lookupString "date" meta)
|
|
t = fromMaybe "(untitled)" (lookupString "title" meta)
|
|
u = maybe "#" (\r -> "/" ++ r) mRoute
|
|
return (d, t, u)
|
|
essayDates <- mapM getDateMeta essays
|
|
postDates <- mapM getDateMeta posts
|
|
let allDates = filter (\(d,_,_) -> not (null d)) (essayDates ++ postDates)
|
|
sortedDates = sortBy (comparing (\(d,_,_) -> d)) allDates
|
|
oldestDate = listToMaybe sortedDates
|
|
newestDate = listToMaybe (reverse sortedDates)
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Tags
|
|
-- ----------------------------------------------------------------
|
|
let tagFreqs = map (\(t, ids) -> (t, length ids)) (tagsMap tags)
|
|
topTags = take 15 (sortBy (comparing (Down . snd)) tagFreqs)
|
|
uniqueTags = length tagFreqs
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Backlinks: most-linked page + orphan count
|
|
-- ----------------------------------------------------------------
|
|
blItem <- load (fromFilePath "data/backlinks.json") :: Compiler (Item String)
|
|
let rawBL = itemBody blItem
|
|
mBLVal = Aeson.decodeStrict (TE.encodeUtf8 (T.pack rawBL)) :: Maybe Aeson.Value
|
|
blPairs = case mBLVal of
|
|
Just (Aeson.Object km) ->
|
|
[ (T.unpack (AK.toText k),
|
|
case v of Aeson.Array arr -> V.length arr; _ -> 0)
|
|
| (k, v) <- KM.toList km ]
|
|
_ -> []
|
|
blSet = Set.fromList (map fst blPairs)
|
|
orphanCount = length
|
|
[ p | p <- allPIs
|
|
, not (Set.member (normUrl (piUrl p)) blSet) ]
|
|
mostLinked = listToMaybe (sortBy (comparing (Down . snd)) blPairs)
|
|
mostLinkedInfo = mostLinked >>= \(url, ct) ->
|
|
let mTitle = piTitle <$> find (\p -> normUrl (piUrl p) == url) allPIs
|
|
in Just (url, ct, fromMaybe url mTitle)
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Epistemic coverage (essays + posts)
|
|
-- ----------------------------------------------------------------
|
|
essayMetas <- mapM (getMetadata . itemIdentifier) essays
|
|
postMetas <- mapM (getMetadata . itemIdentifier) posts
|
|
let epMetas = essayMetas ++ postMetas
|
|
epTotal = length epMetas
|
|
ep f = length (filter (isJust . f) epMetas)
|
|
withStatus = ep (lookupString "status")
|
|
withConf = ep (lookupString "confidence")
|
|
withImp = ep (lookupString "importance")
|
|
withEv = ep (lookupString "evidence")
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Output directory stats
|
|
-- ----------------------------------------------------------------
|
|
(outputGrouped, totalFiles, totalSize) <-
|
|
unsafeCompiler getOutputStats
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Lines of code + git stats
|
|
-- ----------------------------------------------------------------
|
|
(hf, hl, cf, cl, jf, jl) <- unsafeCompiler getLocStats
|
|
(commits, firstDate) <- unsafeCompiler getGitStats
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Build timestamp + last build duration
|
|
-- ----------------------------------------------------------------
|
|
buildTimestamp <- unsafeCompiler $
|
|
formatTime defaultTimeLocale "%Y-%m-%d %H:%M UTC" <$> getCurrentTime
|
|
lastBuildDur <- unsafeCompiler $
|
|
(readFile "data/last-build-seconds.txt" >>= \s ->
|
|
let secs = fromMaybe 0 (readMaybe (filter (/= '\n') s) :: Maybe Int)
|
|
in return (show secs ++ "s"))
|
|
`catch` (\(_ :: IOException) -> return "\x2014")
|
|
|
|
-- ----------------------------------------------------------------
|
|
-- Assemble page
|
|
-- ----------------------------------------------------------------
|
|
let htmlContent :: H.Html
|
|
htmlContent = do
|
|
renderContent rows
|
|
renderPages allPIs oldestDate newestDate
|
|
renderDistribution allWCs
|
|
renderTagsSection topTags uniqueTags
|
|
renderLinks mostLinkedInfo orphanCount (length allPIs)
|
|
renderEpistemic epTotal withStatus withConf withImp withEv
|
|
renderOutput outputGrouped totalFiles totalSize
|
|
renderRepository hf hl cf cl jf jl commits firstDate
|
|
renderBuild buildTimestamp lastBuildDur
|
|
contentString = renderHtml htmlContent
|
|
plainText = stripHtmlTags contentString
|
|
wc = length (words plainText)
|
|
rt = readingTime plainText
|
|
ctx = constField "toc" (renderHtml pageTOC)
|
|
<> constField "word-count" (show wc)
|
|
<> constField "reading-time" (show rt)
|
|
<> constField "title" "Build Telemetry"
|
|
<> constField "abstract" "Per-build corpus statistics, tag distribution, \
|
|
\link analysis, epistemic coverage, output metrics, \
|
|
\repository overview, and build timing."
|
|
<> constField "build" "true"
|
|
<> authorLinksField
|
|
<> siteCtx
|
|
|
|
makeItem contentString
|
|
>>= loadAndApplyTemplate "templates/essay.html" ctx
|
|
>>= loadAndApplyTemplate "templates/default.html" ctx
|
|
>>= relativizeUrls
|
|
|
|
-- -------------------------------------------------------------------------
|
|
-- Writing statistics page (/stats/)
|
|
-- -------------------------------------------------------------------------
|
|
create ["stats/index.html"] $ do
|
|
route idRoute
|
|
compile $ do
|
|
essays <- loadAll (P.essayPattern .&&. hasNoVersion)
|
|
posts <- loadAll ("content/blog/*.md" .&&. hasNoVersion)
|
|
poems <- loadAll ("content/poetry/*.md" .&&. hasNoVersion)
|
|
fiction <- loadAll ("content/fiction/*.md" .&&. hasNoVersion)
|
|
comps <- loadAll ("content/music/*/index.md" .&&. hasNoVersion)
|
|
|
|
essayWCs <- mapM loadWC essays
|
|
postWCs <- mapM loadWC posts
|
|
poemWCs <- mapM loadWC poems
|
|
fictionWCs <- mapM loadWC fiction
|
|
compWCs <- mapM loadWC comps
|
|
|
|
let allItems = essays ++ posts ++ poems ++ fiction ++ comps
|
|
typeRows =
|
|
[ TypeRow "Essays" (length essays) (sum essayWCs)
|
|
, TypeRow "Blog posts" (length posts) (sum postWCs)
|
|
, TypeRow "Poems" (length poems) (sum poemWCs)
|
|
, TypeRow "Fiction" (length fiction) (sum fictionWCs)
|
|
, TypeRow "Compositions" (length comps) (sum compWCs)
|
|
]
|
|
|
|
allPIs <- catMaybes <$> mapM loadPI allItems
|
|
|
|
-- Build wordsByDay: for each item with a parseable `date`, map that
|
|
-- day to the item's word count (summing if multiple items share a date).
|
|
datePairs <- fmap catMaybes $ forM allItems $ \item -> do
|
|
meta <- getMetadata (itemIdentifier item)
|
|
wc <- loadWC item
|
|
return $ case lookupString "date" meta >>= parseDay of
|
|
Nothing -> Nothing
|
|
Just d -> Just (d, wc)
|
|
let wordsByDay = Map.fromListWith (+) datePairs
|
|
|
|
let tagFreqs = map (\(t, ids) -> (t, length ids)) (tagsMap tags)
|
|
topTags = take 15 (sortBy (comparing (Down . snd)) tagFreqs)
|
|
uniqueTags = length tagFreqs
|
|
|
|
today <- unsafeCompiler (utctDay <$> getCurrentTime)
|
|
|
|
let htmlContent :: H.Html
|
|
htmlContent = do
|
|
section "activity" "Writing activity" (renderHeatmap wordsByDay today)
|
|
renderMonthlyVolume wordsByDay
|
|
renderCorpus typeRows allPIs
|
|
renderNotable allPIs
|
|
renderStatsTags topTags uniqueTags
|
|
contentString = renderHtml htmlContent
|
|
plainText = stripHtmlTags contentString
|
|
wc = length (words plainText)
|
|
rt = readingTime plainText
|
|
ctx = constField "toc" (renderHtml statsTOC)
|
|
<> constField "word-count" (show wc)
|
|
<> constField "reading-time" (show rt)
|
|
<> constField "title" "Writing Statistics"
|
|
<> constField "abstract" "Writing activity, corpus breakdown, \
|
|
\and tag distribution — computed at build time."
|
|
<> constField "build" "true"
|
|
<> authorLinksField
|
|
<> siteCtx
|
|
|
|
makeItem contentString
|
|
>>= loadAndApplyTemplate "templates/essay.html" ctx
|
|
>>= loadAndApplyTemplate "templates/default.html" ctx
|
|
>>= relativizeUrls
|