Professional content refactor
This commit is contained in:
parent
e969461ca3
commit
913a374fb2
|
|
@ -54,6 +54,13 @@ data/semantic-meta.json
|
|||
# IGNORE.txt is for the local build and need not be synced.
|
||||
IGNORE.txt
|
||||
|
||||
# CV/résumé build pipeline (YAML → Jinja → xelatex). The canonical PDFs
|
||||
# live under static/ and ship with the site; the pipeline itself is
|
||||
# kept locally for regeneration but not version-controlled here.
|
||||
yaml-source/
|
||||
# Handover bundle (archived locally for reference; not part of the site).
|
||||
levineuwirth_handover.zip
|
||||
|
||||
# Model files for client-side semantic search (~22 MB binary artifacts).
|
||||
# Download with: make download-model
|
||||
static/models/
|
||||
|
|
|
|||
24
Makefile
24
Makefile
|
|
@ -1,4 +1,4 @@
|
|||
.PHONY: build deploy sign download-model convert-images pdf-thumbs watch clean dev
|
||||
.PHONY: build deploy sign download-model convert-images pdf-thumbs pdfs watch clean dev
|
||||
|
||||
# Source .env for GITHUB_TOKEN and GITHUB_REPO if it exists.
|
||||
# .env format: KEY=value (one per line, no `export` prefix, no quotes needed).
|
||||
|
|
@ -59,6 +59,28 @@ pdf-thumbs:
|
|||
echo "pdf-thumbs: pdftoppm not found — install poppler (skipping)"; \
|
||||
fi
|
||||
|
||||
# Rebuild the CV + website résumé from yaml-source/ and refresh static/.
|
||||
# Standalone helper — NOT a dependency of `build` or `deploy`. Run manually
|
||||
# after editing a YAML under yaml-source/data/. The site build copies
|
||||
# static/*.pdf through unchanged, so a subsequent `make build` picks them up.
|
||||
#
|
||||
# The ATS variant (yaml-source/output/resume_ats.pdf) is intentionally not
|
||||
# copied to static/ — it's a submission artifact, not a website asset. To
|
||||
# regenerate it too, run `make -C yaml-source ats` directly.
|
||||
#
|
||||
# Silently skipped on hosts without the pipeline (e.g., the VPS): yaml-source/
|
||||
# is gitignored, so it's absent on a fresh clone, and that's the expected
|
||||
# state wherever the LaTeX toolchain isn't installed.
|
||||
pdfs:
|
||||
@if [ ! -d yaml-source ]; then \
|
||||
echo "pdfs: yaml-source/ not present — skipping (pipeline is local-only)"; \
|
||||
exit 0; \
|
||||
fi
|
||||
@$(MAKE) -C yaml-source all
|
||||
@cp yaml-source/output/cv.pdf static/cv.pdf
|
||||
@cp yaml-source/output/resume.pdf static/resume.pdf
|
||||
@echo "pdfs: static/cv.pdf and static/resume.pdf refreshed."
|
||||
|
||||
deploy: clean build sign
|
||||
@test -n "$(VPS_USER)" || (echo "deploy: VPS_USER not set in .env" >&2; exit 1)
|
||||
@test -n "$(VPS_HOST)" || (echo "deploy: VPS_HOST not set in .env" >&2; exit 1)
|
||||
|
|
|
|||
36
WRITING.md
36
WRITING.md
|
|
@ -869,8 +869,8 @@ Selecting any text (≥ 2 characters) shows a context-aware toolbar after 450 ms
|
|||
|
||||
| Context | Buttons |
|
||||
|---------|---------|
|
||||
| Prose (multi-word) | BibTeX · Copy · DuckDuckGo · Here · Wikipedia |
|
||||
| Prose (single word) | BibTeX · Copy · Define · DuckDuckGo · Here · Wikipedia |
|
||||
| Prose (multi-word) | Annotate · BibTeX · Copy · DuckDuckGo · Here · \[Translate\] · Wikipedia |
|
||||
| Prose (single word) | Annotate · BibTeX · Copy · Define · DuckDuckGo · Here · \[Translate\] · Wikipedia |
|
||||
| Math | Copy · nLab · OEIS · Wolfram |
|
||||
| Code (known language) | Copy · \<MDN / Hoogle / Docs…\> |
|
||||
| Code (unknown) | Copy |
|
||||
|
|
@ -878,6 +878,38 @@ Selecting any text (≥ 2 characters) shows a context-aware toolbar after 450 ms
|
|||
**BibTeX** generates a `@online{...}` BibLaTeX entry with the selected text in
|
||||
`note={\enquote{...}}` and copies it to the clipboard. **Define** opens English
|
||||
Wiktionary. **Here** opens the Pagefind search page pre-filled with the selection.
|
||||
**Translate** appears only for selections inside a non-English `[lang]` subtree
|
||||
(see below) and opens DeepL with the source lang pre-set and the target as English.
|
||||
|
||||
---
|
||||
|
||||
## Non-English passages
|
||||
|
||||
Wrap non-English text in a Pandoc fenced div with a `lang` attribute. The
|
||||
primary subtag is a BCP-47 code (`es`, `fr`, `la`, `de`, `zh`, …):
|
||||
|
||||
```markdown
|
||||
::: {lang="es"}
|
||||
> *El universo (que otros llaman la Biblioteca) se compone de un número
|
||||
> indefinido, y tal vez infinito, de galerías hexagonales…*
|
||||
:::
|
||||
```
|
||||
|
||||
Pandoc emits `<div lang="es">…</div>`. For inline passages inside an
|
||||
otherwise-English paragraph, use the span form:
|
||||
|
||||
```markdown
|
||||
He opened with a cheerful [bonjour, mon ami]{lang="fr"} and kept going.
|
||||
```
|
||||
|
||||
which produces `<span lang="fr">…</span>`.
|
||||
|
||||
The page root is `<html lang="en">`, so any subtree with a different primary
|
||||
lang subtag activates the **Translate** button in the selection popup —
|
||||
clicking it opens DeepL with the detected source language and English as the
|
||||
target. Languages DeepL does not support (e.g. Latin) fall back to DeepL's
|
||||
auto-detect. Matching the page root (`lang="en"`) does nothing — there is no
|
||||
point translating English into English.
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -66,9 +66,12 @@ fictionPattern = "content/fiction/*.md"
|
|||
musicPattern :: Pattern
|
||||
musicPattern = "content/music/*/index.md"
|
||||
|
||||
-- | Top-level standalone pages (about, colophon, current, gpg, …).
|
||||
-- | Top-level standalone pages (about, colophon, current, gpg, …) and
|
||||
-- the curated routing pages under @content/cv/@ (which render with the
|
||||
-- same @templates/page.html@ pipeline and need the same backlink and
|
||||
-- content-indexing treatment).
|
||||
standalonePagesPattern :: Pattern
|
||||
standalonePagesPattern = "content/*.md"
|
||||
standalonePagesPattern = "content/*.md" .||. "content/cv/*.md"
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Aggregations
|
||||
|
|
|
|||
|
|
@ -280,6 +280,24 @@ rules = do
|
|||
>>= loadAndApplyTemplate "templates/default.html" pageCtx
|
||||
>>= relativizeUrls
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- CV routing pages (content/cv/*.md → /cv/<slug>/).
|
||||
-- These are narrative overlays pointing into the library; they render
|
||||
-- with the same page.html pipeline as top-level standalone pages, but
|
||||
-- route to directory-style URLs (/cv/projects/ rather than /cv/projects.html)
|
||||
-- so nginx serves them via index-file resolution and the URLs stay stable
|
||||
-- if the underlying files are later reorganized into co-located directories.
|
||||
-- ---------------------------------------------------------------------------
|
||||
match "content/cv/*.md" $ do
|
||||
route $ customRoute $ \ident ->
|
||||
let fname = takeFileName (toFilePath ident)
|
||||
slug = takeWhile (/= '.') fname
|
||||
in "cv/" ++ slug ++ "/index.html"
|
||||
compile $ pageCompiler
|
||||
>>= loadAndApplyTemplate "templates/page.html" pageCtx
|
||||
>>= loadAndApplyTemplate "templates/default.html" pageCtx
|
||||
>>= relativizeUrls
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- Essays — flat (content/essays/foo.md → essays/foo.html) and
|
||||
-- directory-based (content/essays/slug/index.md → essays/slug/index.html).
|
||||
|
|
|
|||
|
|
@ -1,32 +1,54 @@
|
|||
---
|
||||
title: About Levi Neuwirth
|
||||
title: Levi Neuwirth — Vita
|
||||
tags: meta
|
||||
---
|
||||
|
||||
For a less formal, more detailed introduction to who I am, see [[Me]]. This page serves as a professional summary of my background.
|
||||
For a less formal, more detailed introduction to who I am, see [[Me]].
|
||||
|
||||
## Documents
|
||||
These are probably what you're looking for. A summary of the key points follows below!
|
||||
|
||||
- **[Curriculum Vitae (PDF)](/cv.pdf)**
|
||||
- **[Resume (PDF)](/resume.pdf)**
|
||||
|
||||
## Education
|
||||
|
||||
**Brown University**, S.cB in Mathematics, Computer Science (May 2026)
|
||||
: advised by D. Ellis Hershkowitz, John F. Hughes, and Rashid Zia
|
||||
- **Technical University of Denmark (DTU)** — MSc in Computer Science and Engineering. September 2026 – expected 2028. Expecting PhD studies after.
|
||||
- **Brown University** — Sc.B. in Computer Science and Mathematics. August 2022 – May 2026
|
||||
- **DIS Copenhagen / Københavns Universitet** — Semester abroad. Fall 2024
|
||||
|
||||
**DIS Copenhagen, Københavns Universitet** Semester Abroad (Fall 2024)
|
||||
## Research
|
||||
|
||||
### Research Interests
|
||||
ARTIFICIAL INTELLIGENCE, COMPUTER VISION, COMPUTER SYSTEMS, CRYPTOGRAPHY, MACHINE LEARNING, NETWORKING, NUMBER THEORY, REINFORCEMENT LEARNING, SECURITY
|
||||
### Published / In Submission
|
||||
|
||||
## Publications & Preprints
|
||||
- **Shu L, Neuwirth L†, Wang X†, Zheng H†.** *Beyond Comorbidity Indices: An Order-Invariant ICD-10-CM Embedding for Readmission and Mortality Prediction.* Under review at *JAMA Network Open*, 2026. [Preprint](/essays/beyond-comorbidity-indices/) · [Calculator](https://levineuwirth.github.io/icd_embeddings/) · [Code](https://github.com/levineuwirth/icd_embeddings)
|
||||
- **Neuwirth L.** *Where Does SIMD Help Post-Quantum Cryptography? A Micro-Architectural Study of ML-KEM on x86 AVX2.* Technical report, Brown University Department of Computer Science, April 2026. [Report](/essays/where-does-simd-help-post-quantum-cryptography/) · [Artifact](https://git.levineuwirth.org/neuwirth/where-simd-helps)
|
||||
|
||||
Coming soon
|
||||
### In Preparation / In Progress
|
||||
|
||||
## Languages
|
||||
- English (native)
|
||||
- Spanish (C1)
|
||||
- Danish (B1)
|
||||
- German (A2, currently focused on learning)
|
||||
- Chinese (A2)
|
||||
- French (A1)
|
||||
- **[NeuroPose](/essays/neuropose/) clinical-implications manuscript.** In preparation; target submission 2026–2027.
|
||||
- **SIMD / PQC Phase 2 & Phase 3.** Hardware performance counters (PAPI), RAPL energy, and cross-ISA ports (ARM NEON/SVE, RISC-V V).
|
||||
- **Semantic-embeddings citation project.** Early-stage work with [NeuroAI](https://neuroai.health), preprint expected summer 2026.
|
||||
- **Magic: The Gathering reinforcement learning project.** Early-stage work through Brown's HPC, expected late 2026.
|
||||
|
||||
### Presentations
|
||||
|
||||
- **Early Detection of Neurological Disorders through Video-Captured Kinematic Analysis.** Ma J, Arms S, Kaneira L, Lall M, Chen K, Cabral W, Man D, Neuwirth L, Shu L. Poster, Brown / Rhode Island Hospital Neurology Summer UTRA Symposium, August 2025.
|
||||
- **"Order-Invariant ICD-10-CM Embedding for Readmission and Mortality Prediction: Toward Multimodal Generative Patient Models"**Shu L, Neuwirth L†, Wang X†, Zheng H†. IEEE/ACM Conference on Connected Health: Applications, Systems and Engineering Technologies (CHASE), August 2026, in review.
|
||||
|
||||
†Equal-contribution undergraduate authors.
|
||||
|
||||
## Experience
|
||||
See [resume (PDF)](/resume.pdf).
|
||||
|
||||
- **xAI** Summer 2025, remote. Contributed to training of `grok-code-fast-1`. Integrated LLMs into autonomous agent frameworks spanning 20 tools across thousands of production workflows and codebases; diagnosed and resolved 50+ agentic failures, improving tool-execution rates by >15%, reducing fatal tool-usage errors by >40%, and reducing API usage by >20%. Contributed to safety systems, reducing hallucination rates by >50% in controlled agent-behavior tests.
|
||||
- **NeuroAI** Present. [neuroai.health](https://neuroai.health). Early-stage venture of academics and clinicians integrating deep learning, reinforcement learning, and generative AI into clinical and research workflows; leading research-engineering across model development, deployment infrastructure, and system design.
|
||||
- **Shu Laboratory, Brown Department of Neurology** — *Undergraduate Researcher and Technical Lead.* October 2023 – Present. Technical lead on [NeuroPose](/essays/neuropose/); co-lead developer on the [ICD-10-CM embedding model](/essays/beyond-comorbidity-indices/).
|
||||
- **Independent Research Contracting** — *Anthropic, Mistral, OpenAI.* 2025 – Present Expert reasoning contributions in code and mathematics for agentic workflows, agentic task design and evaluation, AI safety, and red-teaming.
|
||||
|
||||
## Projects
|
||||
|
||||
For a complete index of engineering artifacts — kernels, networking, cryptography, deployed ML — see [/cv/projects/](/cv/projects/).
|
||||
## Contact
|
||||
|
||||
[ln@levineuwirth.org](mailto:ln@levineuwirth.org)
|
||||
[ln@levineuwirth.org](mailto:ln@levineuwirth.org) · [ORCID 0009-0002-0162-3587](https://orcid.org/0009-0002-0162-3587) · [GitHub](https://github.com/levineuwirth) · [Forgejo](https://git.levineuwirth.org/neuwirth)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
title: Projects
|
||||
tags: meta
|
||||
---
|
||||
|
||||
Index of engineering artifacts. Systems depth is the primary axis of this page; machine-learning and deployed artifacts follow.
|
||||
|
||||
## Low-Level & Systems
|
||||
|
||||
- **[Weenix](/essays/weenix/)** — Unix-like kernel in ~7,000 lines of C. Virtual memory, VFS, system calls, threading, device drivers, interrupt handlers, and file systems; custom linker support for running userspace x86-64 ELF binaries. Originally a project from Brown CS 169 (Operating Systems with Lab), extended with further features like pipes and userspace preemption. January – May 2025.
|
||||
- **[Networking Stack from Scratch](/essays/networking-stack/)** — TCP/IP, RIP, UDP, and DNS in Go, supporting file transmission of up to 1 GB across networks of 8 virtual machines. Extended with a fully RFC-compliant SSH implementation (2,000+ additional lines) supporting sustained sessions of arbitrary length. October 2024 – July 2025.
|
||||
- **[Where Does SIMD Help Post-Quantum Cryptography?](/essays/where-does-simd-help-post-quantum-cryptography/)** — Hand-written AVX2 assembly for ML-KEM / Kyber. 35×–56× speedup over compiler-optimized C for core NTT arithmetic; 5.4×–7.1× end-to-end KEM speedup. Full statistical-analysis pipeline (Mann-Whitney U, Cliff's δ, bootstrapped CIs) on Brown's OSCAR HPC cluster. Phase 1 report and reproducible artifact public; Phase 2 (PAPI, RAPL) and Phase 3 (ARM NEON/SVE, RISC-V V) in progress. [Artifact](https://git.levineuwirth.org/neuwirth/where-simd-helps)
|
||||
|
||||
## Machine Learning & Deployed
|
||||
|
||||
- **[ICD-10-CM outcome calculator](https://levineuwirth.github.io/icd_embeddings/)** — public, read-only calculator for the permutation-invariant Deep Sets model underlying the paper currently under review at *JAMA Network Open*. Takes a diagnosis-code set; returns 30-day readmission and postdischarge mortality predictions with Integrated-Gradients attribution. [Preprint](/essays/beyond-comorbidity-indices/) · [Code](https://github.com/levineuwirth/icd_embeddings)
|
||||
- **[NeuroPose](/essays/neuropose/)** — 3D pose-estimation and kinematic-analysis system for neurological-recovery research in Liqi Shu's laboratory at Brown Neurology. Python/TensorFlow inference pipeline, MATLAB-based statistical post-processing, Rust backend with HTML/JS frontends. 20,000+ lines across four externally-funded sub-projects since 2023.
|
||||
|
|
@ -24,4 +24,4 @@ The second is a starkly different picture: the human, who only has a vague idea
|
|||
|
||||
These two represent diverging definitions of *intelligence*, both for the models and for their users, or, if you prefer, their collaborators. The first is a definition of intelligence that depends both on what one has the capacity to specify and what one has the capacity to see through. The latter depends wholly on what one has the capacity to see through, and places even more emphasize on this metric than the first, for the amount of recalibration and prompt adjustment necessary to build a specification continuously throughout the duration of a task is always greater than paying the upfront cost of developing a strong specification from the onset. [We the programmers have known this for years](https://en.wikipedia.org/wiki/Hofstadter%27s_law). The first future is chiefly preferable, and the second, which seems to be the unfortunate reality we are racing towards, is not only a realization of the worst affect that AI could have on our cognition, but may also unnecessarily constrain the breadth of intelligence that AGI can achieve.
|
||||
|
||||
## What does "Autonomy" really mean?
|
||||
## Compression *is* Intelligence
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
title: "Networking Stack from Scratch"
|
||||
date: 2026-04-21
|
||||
abstract: >
|
||||
TCP/IP, RIP, UDP, and DNS implementations in Go, supporting file transmission of up to 1 GB across networks of up to 8 virtual machines. Extended with a fully RFC-compliant SSH implementation (2,000+ additional lines) supporting sustained sessions of arbitrary length.
|
||||
tags:
|
||||
- tech
|
||||
authors:
|
||||
- "Levi Neuwirth | /me.html"
|
||||
status: "Draft"
|
||||
confidence: 85
|
||||
importance: 3
|
||||
scope: personal
|
||||
novelty: conventional
|
||||
practicality: moderate
|
||||
confidence-history:
|
||||
---
|
||||
|
||||
A fuller write-up follows. In the meantime, see the [projects index](/cv/projects/).
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
title: "NeuroPose"
|
||||
date: 2026-04-21
|
||||
abstract: >
|
||||
3D pose-estimation and kinematic-analysis system for neurological-recovery research, developed in Liqi Shu's laboratory at the Brown University Department of Neurology. Python/TensorFlow inference, MATLAB-based statistical post-processing, Rust backend with HTML/JS frontends. Four externally-funded sub-projects since 2023; clinical-implications manuscript in preparation.
|
||||
tags:
|
||||
- research
|
||||
- research/machine-learning
|
||||
authors:
|
||||
- "Levi Neuwirth | /me.html"
|
||||
- "Liqi Shu"
|
||||
affiliation:
|
||||
- "Department of Neurology, Warren Alpert Medical School, Brown University"
|
||||
status: "Draft"
|
||||
confidence: 75
|
||||
importance: 3
|
||||
evidence: 4
|
||||
scope: broad
|
||||
novelty: innovative
|
||||
practicality: high
|
||||
confidence-history:
|
||||
---
|
||||
|
||||
A fuller write-up follows with the clinical-implications manuscript. In the meantime, see the [projects index](/cv/projects/).
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
title: "Weenix"
|
||||
date: 2026-04-21
|
||||
abstract: >
|
||||
Full Unix-like kernel in ~7,000 lines of C, written for Brown CS 169 (Operating Systems with Lab): virtual memory, VFS, system calls, threading, device drivers and interrupt handlers, and file systems. Custom linker support for running userspace x86-64 ELF binaries.
|
||||
tags:
|
||||
- tech
|
||||
- tech/C
|
||||
authors:
|
||||
- "Levi Neuwirth | /me.html"
|
||||
status: "Draft"
|
||||
confidence: 85
|
||||
importance: 1
|
||||
scope: local
|
||||
novelty: moderate
|
||||
practicality: low
|
||||
confidence-history:
|
||||
---
|
||||
|
||||
A fuller write-up follows. In the meantime, see the [projects index](/cv/projects/).
|
||||
|
|
@ -14,7 +14,7 @@ This website is *not* an academic homepage, nor a blog, nor a portfolio — thou
|
|||
:::
|
||||
|
||||
<div class="hp-pro-row">
|
||||
<a href="/about.html">About</a><span class="hp-sep" aria-hidden="true">·</span><a href="/cv.pdf">CV</a><span class="hp-sep" aria-hidden="true">·</span><a href="mailto:ln@levineuwirth.org">Email</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://git.levineuwirth.org/neuwirth">Forgejo</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://github.com/levineuwirth">GitHub</a><span class="hp-sep" aria-hidden="true">·</span><a href="/gpg.html">GPG</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://orcid.org/0009-0002-0162-3587">ORCID</a>
|
||||
<a href="/cv.pdf">CV</a><span class="hp-sep" aria-hidden="true">·</span><a href="mailto:ln@levineuwirth.org">Email</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://git.levineuwirth.org/neuwirth">Forgejo</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://github.com/levineuwirth">GitHub</a><span class="hp-sep" aria-hidden="true">·</span><a href="/gpg.html">GPG</a><span class="hp-sep" aria-hidden="true">·</span><a href="https://orcid.org/0009-0002-0162-3587">ORCID</a><span class="hp-sep" aria-hidden="true">·</span><a href="/about.html">Vita</a>
|
||||
</div>
|
||||
|
||||
<div class="hp-curiosity-row">
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
title: Who is Levi Neuwirth?
|
||||
date: 2026-03-16
|
||||
abstract: An extensive introduction to who I am, what defines me, what I do, tools I use, etc. For a more concise, professionally formatted synopsis, see the Biography page linked from the site index.
|
||||
abstract: An extensive introduction to who I am, what defines me, what I do, tools I use, etc. For a more concise, professionally formatted synopsis, see the [Vita](/about.html) page linked from the site index.
|
||||
tags: [meta]
|
||||
status: "Draft"
|
||||
confidence: 90
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ In sequent toil all forwards do contend.</p>
|
|||
<figcaption><a href="/poetry/sonnet-60.html">Sonnet 60</a> — William Shakespeare</figcaption>
|
||||
</figure>
|
||||
|
||||
[Some more text coming here.]
|
||||
|
||||
<div id="weeks-grid-wrapper"></div>
|
||||
|
||||
|
||||
|
|
@ -40,7 +38,7 @@ Yes and no, if you ask me. I believe in the power of transparency and honesty. I
|
|||
Time passes regardless of our wishes, our intentions, our apprehensions. That is perhaps the most beautifully profound fact of our existence, and one that therefore I view with gratitude.
|
||||
:::
|
||||
|
||||
MANY ARTISTS AND THINKERS ALIKE HAVE, OVER CENTURIES, TRIED [TO]{.smallcaps} ENCAPSULATE THIS BEAUTIFUL PROFUNDITY. [*Vanitas*](https://en.wikipedia.org/wiki/Vanitas)^[Spanish Wikipedia has a far more detailed and preferable entry concerning *Vanitas*. For those who speak Spanish, [see here](https://es.wikipedia.org/wiki/Vanitas)] I do not think that my words can do this subject terrible justice, so rather than trying to encapsulate the potent nature of the subject myself, I will only speak here about what it means to me, interspersing my remarks with art of various forms for us to appreciate.
|
||||
MANY ARTISTS AND THINKERS ALIKE HAVE, OVER CENTURIES, TRIED [TO]{.smallcaps} ENCAPSULATE THIS BEAUTIFUL PROFUNDITY THROUGH [*Vanitas*](https://en.wikipedia.org/wiki/Vanitas)^[Spanish Wikipedia has a far more detailed and preferable entry concerning *Vanitas*. For those who speak Spanish, [see here](https://es.wikipedia.org/wiki/Vanitas)] I do not think that my words can do this subject terrible justice, so rather than trying to encapsulate the potent nature of the subject myself, I will only speak here about what it means to me, interspersing my remarks with art of various forms for us to appreciate.
|
||||
|
||||
### My Creative Works
|
||||
|
||||
|
|
|
|||
154
migrate_html.md
154
migrate_html.md
|
|
@ -1,154 +0,0 @@
|
|||
# Migration Plan: Refactoring `Stats.hs` HTML Generation
|
||||
|
||||
This document outlines a comprehensive migration plan for refactoring `build/Stats.hs` from manual string concatenation to a type-safe HTML combinator library, specifically `blaze-html`.
|
||||
|
||||
## Current Architecture and Issues
|
||||
|
||||
Currently, `build/Stats.hs` generates the HTML for the `/build/` and `/stats/` telemetry pages by manually concatenating raw strings (e.g., `"<div class=\"build-bar-row\">" ++ ...`).
|
||||
|
||||
This approach has several drawbacks:
|
||||
1. **Security (XSS):** It is trivial to introduce Cross-Site Scripting (XSS) vulnerabilities if dynamic content (like post titles) is not manually escaped before being interpolated into the HTML string. The audit report specifically flagged the `link` function for this.
|
||||
2. **Correctness:** It is easy to produce malformed HTML (e.g., missing closing tags, improperly nested elements, unescaped attributes) because the compiler cannot verify the structure of the string.
|
||||
3. **Maintainability:** Complex HTML structures (like the 52-week activity heatmap) become difficult to read, modify, and debug when buried within string interpolation logic.
|
||||
4. **Elegance:** It goes against the functional paradigm of building type-safe abstractions.
|
||||
|
||||
## Proposed Solution: `blaze-html`
|
||||
|
||||
`blaze-html` is a fast, mature, type-safe HTML combinator library for Haskell. It allows you to construct HTML documents using native Haskell functions and operators. By ensuring text and attribute values are escaped by default, it substantially reduces XSS risk. Furthermore, it improves structural correctness and reduces malformed markup by constructing HTML through typed combinators instead of ad hoc string concatenation.
|
||||
|
||||
**Scope:** This migration covers `build/Stats.hs` only. The separate `Site.hs` JSON-string-concat issue from the audit report is a distinct fix and is not addressed here.
|
||||
|
||||
For SVG generation (the heatmap), we will **not** add `blaze-svg` as a dependency. It is not currently in `cabal.project.freeze` and adding it would risk the dependency-resolution instability the audit already flagged. Instead, SVG elements will be emitted via blaze-html's custom-element facility (`Text.Blaze.Internal.customParent` / `customAttribute`), or via a small local helper module. This achieves type-safe SVG emission without a new dependency.
|
||||
|
||||
### 1. Dependency Updates
|
||||
|
||||
`blaze-html 0.9.2.0` is already pinned in `cabal.project.freeze` as a transitive dependency of Hakyll/Pandoc. The only required change is to declare it explicitly in `levineuwirth.cabal`.
|
||||
|
||||
* **Modify `levineuwirth.cabal`:** Add `blaze-html >= 0.9 && < 0.10` to the `build-depends` section of the `site` executable.
|
||||
* **No freeze update required.** The package is already resolved; no `cabal freeze` run is needed.
|
||||
|
||||
### 2. Module Imports
|
||||
|
||||
In `build/Stats.hs`, import the core `blaze-html` modules:
|
||||
|
||||
```haskell
|
||||
import qualified Text.Blaze.Html5 as H
|
||||
import qualified Text.Blaze.Html5.Attributes as A
|
||||
import Text.Blaze.Html.Renderer.String (renderHtml)
|
||||
```
|
||||
|
||||
For SVG custom elements (heatmap), use blaze-html's internal custom-element facility:
|
||||
|
||||
```haskell
|
||||
import qualified Text.Blaze.Internal as BI
|
||||
```
|
||||
|
||||
Hakyll's `makeItem` takes a `String`, so `renderHtml :: Html -> String` is the correct renderer. Use it and stop there — the stats page is a few dozen KB at most and performance is not a concern.
|
||||
|
||||
### 3. Refactoring Strategy
|
||||
|
||||
The refactoring process should be approached incrementally, function by function. **Crucially, intermediate functions must return `H.Html`, with rendering to `String` occurring only at the absolute outer boundary.**
|
||||
|
||||
#### Phase 1: URL Sanitization and Core Helpers
|
||||
|
||||
While `blaze-html` escapes text and attributes, it **does not validate URLs**. An attacker could still inject `javascript:alert(1)` into an `href` attribute. We must introduce URL validation alongside our typed HTML helpers.
|
||||
|
||||
* **URL Validation:**
|
||||
|
||||
`isSafeUrl` is defense-in-depth: in current code every URL is produced by Hakyll's `getRoute` or constructed as a `/tag/` string, so there is no live XSS surface. Nevertheless, include it to prevent regressions.
|
||||
|
||||
The naive prefix check in string-land fails on `JavaScript:` (case), `\tjavascript:` (leading whitespace), and `data:text/html` attacks. Use a case-insensitive, stripped allowlist instead:
|
||||
|
||||
```haskell
|
||||
import Data.Char (isSpace, toLower)
|
||||
|
||||
isSafeUrl :: String -> Bool
|
||||
isSafeUrl u =
|
||||
let norm = map toLower (dropWhile isSpace u)
|
||||
in any (`isPrefixOf` norm) ["/", "https://", "mailto:", "#"]
|
||||
|
||||
safeHref :: String -> H.AttributeValue
|
||||
safeHref u
|
||||
| isSafeUrl u = H.stringValue u
|
||||
| otherwise = H.stringValue "#"
|
||||
```
|
||||
|
||||
Note: `http://` is intentionally excluded (mixed-content over HTTPS).
|
||||
|
||||
* **`link`:**
|
||||
* *New:*
|
||||
```haskell
|
||||
link :: String -> String -> H.Html
|
||||
link url title = H.a H.! A.href (safeHref url) $ H.toHtml title
|
||||
```
|
||||
|
||||
* **`section`:**
|
||||
* *New:*
|
||||
```haskell
|
||||
section :: String -> String -> H.Html -> H.Html
|
||||
section id_ title body = do
|
||||
H.h2 H.! A.id (H.stringValue id_) $ H.toHtml title
|
||||
body
|
||||
```
|
||||
|
||||
* **`table` and `dl`:**
|
||||
These will utilize monadic `do` notation or `mapM_` over lists to generate rows and cells, returning `H.Html` natively.
|
||||
|
||||
* **Static TOC builders (`statsTOC`, `pageTOC`):** These also emit string-concat HTML and must be migrated here alongside the other primitives, not left for later.
|
||||
|
||||
#### Phase 2: Structural Components
|
||||
|
||||
Tackle the larger layout functions once the basic primitives are type-safe.
|
||||
|
||||
* **`renderContent`, `renderPages`, `renderDistribution`, `renderTagsSection`, `renderLinks`, `renderEpistemic`, `renderOutput`, `renderRepository`, `renderBuild`, `renderCorpus`, `renderNotable`, `renderMonthlyVolume`, `renderStatsTags`:**
|
||||
All of these return `String` today and must be updated to return `H.Html`. They will compose the newly typed helper functions (`section`, `table`, `dl`).
|
||||
*Example logic for a table row:*
|
||||
```haskell
|
||||
H.tr $ mapM_ (H.td . H.toHtml) cells
|
||||
```
|
||||
|
||||
#### Phase 2.5: Lift the Heatmap's Inline `<style>`
|
||||
|
||||
The current heatmap (`renderHeatmap`) ships a `<style>` block embedded inside the SVG (`Stats.hs:207–211`). Migrate those rules to `static/css/` where the rest of the heatmap CSS variables (`--hm-0` … `--hm-4`) live. This is the right moment to do it — don't carry the inline style into the typed version.
|
||||
|
||||
#### Phase 3: The Heatmap (`renderHeatmap`)
|
||||
|
||||
The heatmap generation involves nested SVG elements, CSS classes, and `<title>` tooltips.
|
||||
|
||||
* **Separation of Concerns:** Separate the data calculation from the rendering. Keep date, color, and layout calculations in pure data functions, and have the rendering functions handle strictly the HTML/SVG emission.
|
||||
* **SVG via custom elements:** Use blaze-html's `Text.Blaze.Internal.customParent` and `customAttribute` to construct SVG elements type-safely, replacing `"<rect class=\"" ++ ...` with typed combinators — no `blaze-svg` dependency required. Alternatively, define a minimal local `Svg` helper module (10–15 lines) that wraps the most-used SVG tags (`svg`, `rect`, `text_`, `figure`) before this phase begins.
|
||||
|
||||
#### Phase 4: Integration with Hakyll
|
||||
|
||||
Finally, update the top-level Hakyll rules that consume these generated structures. This is the only place `renderHtml` should be called.
|
||||
|
||||
* **`statsRules`:**
|
||||
* The `content` variable will now represent a single, large `H.Html` monad.
|
||||
* Call `renderHtml` exactly once to produce a `String`, then pass it to `makeItem`. The `stripHtmlTags`-based word-count pipeline operates on that rendered string and is unaffected.
|
||||
* The static TOC strings (`pageTOC`, `statsTOC`) are also rendered via `renderHtml` before being passed to `constField`.
|
||||
* *Example:*
|
||||
```haskell
|
||||
let htmlContent = do
|
||||
renderContent rows
|
||||
renderPages allPIs oldestDate newestDate
|
||||
-- ...
|
||||
contentString = renderHtml htmlContent
|
||||
plainText = stripHtmlTags contentString
|
||||
```
|
||||
|
||||
#### Phase 5: Testing and Auditing
|
||||
|
||||
* **Auditing:** During migration, thoroughly search for and eliminate any remaining raw HTML helpers, pre-escaped content, or `unsafe` rendering patterns.
|
||||
* **Testing:** Add specific tests for escaping behavior to ensure security goals are met:
|
||||
* Title containing `<script>alert(1)</script>` renders escaped.
|
||||
* Attributes with quotes are escaped correctly.
|
||||
* Dangerous URLs (e.g., `javascript:...`) are rejected or rewritten by `isSafeUrl`/`safeHref`.
|
||||
* Golden/snapshot tests to ensure generated HTML still contains the expected structure.
|
||||
|
||||
### Summary of Benefits
|
||||
|
||||
Completing this migration will:
|
||||
* **Substantially reduce XSS risk:** Text and attribute values will be escaped by default, and dangerous URLs will be validated and neutralized.
|
||||
* **Improve structural correctness:** Using typed combinators prevents malformed markup and enforces balanced tags.
|
||||
* **Improve composability:** Returning `H.Html` from all helper functions avoids "half-rendered" strings and double-escaping issues.
|
||||
* **Improve readability and testability:** Complex UI components like SVG heatmaps will be declarative, and pure data processing will be decoupled from rendering.
|
||||
Binary file not shown.
|
|
@ -14,9 +14,11 @@
|
|||
12. PubMed — NCBI esummary, title + authors + journal
|
||||
|
||||
Production nginx CSP must add to connect-src:
|
||||
https://en.wikipedia.org https://api.crossref.org
|
||||
https://*.wikipedia.org https://api.crossref.org
|
||||
https://api.github.com https://openlibrary.org
|
||||
https://api.biorxiv.org https://www.youtube.com
|
||||
(The wildcard covers per-language Wikipedias: en, es, fr, simple,
|
||||
zh-yue, be-tarask, … — the popup picks the host from the link URL.)
|
||||
|
||||
Production nginx must also reverse-proxy three CORS-broken upstreams
|
||||
(immutable metadata — long cache TTL is safe). See nginx/popup-proxy.conf.
|
||||
|
|
@ -460,14 +462,21 @@
|
|||
var PROVIDERS = [
|
||||
/* Wikipedia — MediaWiki action API, full lead section, text-only.
|
||||
Uses .popup-extract rather than .popup-abstract; the parser
|
||||
signals this by returning `extract` instead of `abstract`. */
|
||||
signals this by returning `extract` instead of `abstract`.
|
||||
|
||||
The API host matches the link's own subdomain, so es.wikipedia.org
|
||||
links fetch the Spanish extract, de.wikipedia.org fetches German,
|
||||
etc. Bare wikipedia.org and www. fall through to en. */
|
||||
{
|
||||
name: 'wikipedia', label: 'Wikipedia',
|
||||
match: /wikipedia\.org\/wiki\/([^#?]+)/,
|
||||
fetchType: 'json',
|
||||
bodyLimit: 600,
|
||||
url: function (ctx) {
|
||||
return 'https://en.wikipedia.org/w/api.php'
|
||||
var hostMatch = ctx.href.match(/\/\/([a-z0-9-]+)\.wikipedia\.org\//i);
|
||||
var sub = hostMatch ? hostMatch[1].toLowerCase() : 'en';
|
||||
if (sub === 'www') sub = 'en';
|
||||
return 'https://' + sub + '.wikipedia.org/w/api.php'
|
||||
+ '?action=query&prop=extracts&exintro=1&format=json&redirects=1'
|
||||
+ '&titles=' + encodeURIComponent(decodeURIComponent(ctx.match[1]))
|
||||
+ '&origin=*';
|
||||
|
|
|
|||
|
|
@ -7,6 +7,12 @@
|
|||
math → Copy · nLab · OEIS · Wolfram
|
||||
prose (multi-word) → Annotate · BibTeX · Copy · DuckDuckGo · Here · Wikipedia
|
||||
prose (one word) → Annotate · BibTeX · Copy · Define · DuckDuckGo · Here · Wikipedia
|
||||
|
||||
When the selection sits inside a [lang] subtree whose primary subtag
|
||||
differs from the document root (e.g. the ::: {lang="es"} blocks in
|
||||
library.md), a Translate button is inserted in alphabetical position
|
||||
that opens DeepL. Languages DeepL does not support fall back to
|
||||
DeepL's auto-detect.
|
||||
*/
|
||||
(function () {
|
||||
'use strict';
|
||||
|
|
@ -23,6 +29,16 @@
|
|||
Label: short button text. url: base search URL (query appended).
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
/* DeepL-supported source language codes (primary subtags, lowercase).
|
||||
Selections whose resolved lang is not in this set are sent with
|
||||
source='auto' so DeepL can attempt its own detection. */
|
||||
var DEEPL_SOURCES = {
|
||||
ar: 1, bg: 1, cs: 1, da: 1, de: 1, el: 1, en: 1, es: 1, et: 1,
|
||||
fi: 1, fr: 1, he: 1, hu: 1, id: 1, it: 1, ja: 1, ko: 1, lt: 1,
|
||||
lv: 1, nb: 1, nl: 1, no: 1, pl: 1, pt: 1, ro: 1, ru: 1, sk: 1,
|
||||
sl: 1, sv: 1, th: 1, tr: 1, uk: 1, vi: 1, zh: 1,
|
||||
};
|
||||
|
||||
var DOC_PROVIDERS = {
|
||||
'javascript': { label: 'MDN', url: 'https://developer.mozilla.org/en-US/search?q=' },
|
||||
'typescript': { label: 'MDN', url: 'https://developer.mozilla.org/en-US/search?q=' },
|
||||
|
|
@ -113,6 +129,29 @@
|
|||
return 'prose';
|
||||
}
|
||||
|
||||
/* Returns the primary subtag of the nearest [lang] ancestor of the
|
||||
selection (e.g. "es-MX" → "es"), or null when that lang matches the
|
||||
document root — in which case the text is in the page's default
|
||||
language and no Translate button is warranted. */
|
||||
function getSelectionLang(sel) {
|
||||
if (!sel.rangeCount) return null;
|
||||
var node = sel.getRangeAt(0).commonAncestorContainer;
|
||||
var el = (node.nodeType === Node.TEXT_NODE) ? node.parentElement : node;
|
||||
if (!el) return null;
|
||||
|
||||
var langEl = el.closest('[lang]');
|
||||
if (!langEl) return null;
|
||||
|
||||
var lang = (langEl.getAttribute('lang') || '').toLowerCase().split('-')[0];
|
||||
if (!lang) return null;
|
||||
|
||||
var rootLang = (document.documentElement.getAttribute('lang') || 'en')
|
||||
.toLowerCase().split('-')[0];
|
||||
if (lang === rootLang) return null;
|
||||
|
||||
return lang;
|
||||
}
|
||||
|
||||
/* Returns the Prism language identifier for the code block containing
|
||||
the current selection, or null if the language is not annotated. */
|
||||
function getCodeLanguage(sel) {
|
||||
|
|
@ -144,8 +183,9 @@
|
|||
var context = getContext(sel);
|
||||
var oneWord = isSingleWord(text);
|
||||
var codeLang = (context === 'code') ? getCodeLanguage(sel) : null;
|
||||
var selLang = (context === 'prose') ? getSelectionLang(sel) : null;
|
||||
|
||||
popup.innerHTML = buildHTML(context, oneWord, codeLang);
|
||||
popup.innerHTML = buildHTML(context, oneWord, codeLang, selLang);
|
||||
popup.style.visibility = 'hidden';
|
||||
popup.classList.add('is-visible');
|
||||
|
||||
|
|
@ -198,7 +238,7 @@
|
|||
HTML builder — context-aware button sets
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
function buildHTML(context, oneWord, codeLang) {
|
||||
function buildHTML(context, oneWord, codeLang, selLang) {
|
||||
if (context === 'code') {
|
||||
var provider = codeLang ? DOC_PROVIDERS[codeLang] : null;
|
||||
return btn('copy', 'Copy')
|
||||
|
|
@ -213,13 +253,14 @@
|
|||
+ btn('wolfram', 'Wolfram');
|
||||
}
|
||||
|
||||
/* Prose: Annotate · BibTeX · Copy · [Define] · DuckDuckGo · Here · Wikipedia */
|
||||
/* Prose: Annotate · BibTeX · Copy · [Define] · DuckDuckGo · Here · [Translate] · Wikipedia */
|
||||
return btn('annotate', 'Annotate')
|
||||
+ btn('cite', 'BibTeX')
|
||||
+ btn('copy', 'Copy')
|
||||
+ (oneWord ? btn('define', 'Define') : '')
|
||||
+ btn('search', 'DuckDuckGo')
|
||||
+ btn('here', 'Here')
|
||||
+ (selLang ? translateBtn(selLang) : '')
|
||||
+ btn('wikipedia', 'Wikipedia');
|
||||
}
|
||||
|
||||
|
|
@ -238,6 +279,13 @@
|
|||
+ provider.label + '</button>';
|
||||
}
|
||||
|
||||
/* Translate button carries the resolved source lang so dispatch can pass
|
||||
it to DeepL (or fall through to auto-detect). */
|
||||
function translateBtn(lang) {
|
||||
return '<button class="selection-popup-btn" data-action="translate"'
|
||||
+ ' data-lang="' + lang + '">Translate</button>';
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------
|
||||
Action bindings
|
||||
|
|
@ -366,6 +414,14 @@
|
|||
/* Site search via Pagefind — opens search page with query pre-filled. */
|
||||
window.open('/search.html?q=' + q, '_blank', 'noopener,noreferrer');
|
||||
|
||||
} else if (action === 'translate') {
|
||||
/* DeepL web translator. Hash-routed source/target/text triple;
|
||||
'auto' is used when the resolved lang is not one DeepL supports. */
|
||||
var src = el.getAttribute('data-lang') || '';
|
||||
var srcCode = DEEPL_SOURCES[src] ? src : 'auto';
|
||||
window.open('https://www.deepl.com/translator#' + srcCode + '/en/' + q,
|
||||
'_blank', 'noopener,noreferrer');
|
||||
|
||||
} else if (action === 'annotate') {
|
||||
showAnnotatePicker(text, rect);
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -7,7 +7,6 @@
|
|||
<a href="/">Home</a>
|
||||
<a href="/library.html">Library</a>
|
||||
<a href="/me.html">Me</a>
|
||||
<a href="/current.html">Current</a>
|
||||
<a href="/new.html">New</a>
|
||||
<a href="/links.html">Links</a>
|
||||
<a href="/search.html">Search</a>
|
||||
|
|
|
|||
Loading…
Reference in New Issue