/* semantic-search.js — Client-side semantic search using paragraph embeddings. * * At build time, tools/embed.py produces: * /data/semantic-index.bin raw Float32Array (N_paragraphs × 384 dims) * /data/semantic-meta.json [{url, title, heading, excerpt}, ...] * * At query time, transformers.js embeds the user's query with all-MiniLM-L6-v2 * (same model used at build time) and ranks paragraphs by cosine similarity. * All computation is client-side; no server required. * * Model: Xenova/all-MiniLM-L6-v2 (~22 MB quantized, cached by browser after first load) * Model files served from /models/all-MiniLM-L6-v2/ (same-origin; run tools/download-model.sh) * Index format: raw little-endian Float32, shape [N, 384], unit-normalized * * CSP: requires cdn.jsdelivr.net in script-src (transformers.js library). * connect-src stays 'self' — model weights are served same-origin. */ (function () { 'use strict'; var MODEL = 'all-MiniLM-L6-v2'; /* local name, no Xenova/ prefix */ var MODEL_PATH = '/models/'; /* served same-origin */ var DIM = 384; var TOP_K = 8; var CDN = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2'; var extractor = null; /* loaded lazily on first search */ var vectors = null; /* Float32Array, shape [N, DIM] */ var meta = null; /* [{url, title, heading, excerpt}] */ var indexReady = false; var queryEl = document.getElementById('semantic-query'); var statusEl = document.getElementById('semantic-status'); var resultsEl = document.getElementById('semantic-results'); if (!queryEl) return; /* not on the search page */ /* ------------------------------------------------------------------ Index loading — fetch once, lazily ------------------------------------------------------------------ */ function loadIndex() { if (indexReady) return Promise.resolve(); return Promise.all([ fetch('/data/semantic-index.bin').then(function (r) { if (!r.ok) throw new Error('semantic-index.bin not found'); return r.arrayBuffer(); }), fetch('/data/semantic-meta.json').then(function (r) { if (!r.ok) throw new Error('semantic-meta.json not found'); return r.json(); }), ]).then(function (results) { vectors = new Float32Array(results[0]); meta = results[1]; indexReady = true; }); } /* ------------------------------------------------------------------ Model loading — dynamic import from CDN, lazy ------------------------------------------------------------------ */ function loadModel() { if (extractor) return Promise.resolve(extractor); setStatus('Loading model…'); return import(CDN).then(function (mod) { /* Point transformers.js at our self-hosted model files. */ mod.env.localModelPath = MODEL_PATH; mod.env.allowRemoteModels = false; return mod.pipeline('feature-extraction', MODEL, { quantized: true }); }).then(function (pipe) { extractor = pipe; return extractor; }); } /* ------------------------------------------------------------------ Search ------------------------------------------------------------------ */ function cosineSims(queryVec) { /* queryVec is already unit-normalized; dot product = cosine similarity */ var N = meta.length; var scores = new Float32Array(N); for (var i = 0; i < N; i++) { var dot = 0; var off = i * DIM; for (var d = 0; d < DIM; d++) dot += queryVec[d] * vectors[off + d]; scores[i] = dot; } return scores; } function topK(scores) { var indices = Array.from({ length: meta.length }, function (_, i) { return i; }); indices.sort(function (a, b) { return scores[b] - scores[a]; }); return indices.slice(0, TOP_K).map(function (i) { return { idx: i, score: scores[i] }; }); } function runSearch(query) { query = query.trim(); if (!query) { clearResults(); return; } setStatus('Searching…'); var indexPromise = loadIndex().catch(function (err) { setStatus('Semantic index not available — run make build first.'); throw err; }); var modelPromise = loadModel(); Promise.all([indexPromise, modelPromise]).then(function (results) { var pipe = results[1]; return pipe(query, { pooling: 'mean', normalize: true }); }).then(function (output) { var queryVec = output.data; /* Float32Array, length 384 */ var scores = cosineSims(queryVec); var hits = topK(scores); renderResults(hits); setStatus(hits.length ? '' : 'No results found.'); }).catch(function (err) { if (err.message && err.message.indexOf('not available') === -1) { setStatus('Search error — see console for details.'); console.error('semantic-search:', err); } }); } /* ------------------------------------------------------------------ Rendering ------------------------------------------------------------------ */ function renderResults(hits) { if (!hits.length) { clearResults(); return; } var html = '
    '; for (var i = 0; i < hits.length; i++) { var h = hits[i]; var m = meta[h.idx]; var sameHeading = m.heading === m.title; html += '
  1. ' + '' + esc(m.title) + ''; if (!sameHeading) { html += ' § ' + esc(m.heading) + ''; } html += '

    ' + esc(m.excerpt) + '

    ' + '
  2. '; } html += '
'; resultsEl.innerHTML = html; } function clearResults() { resultsEl.innerHTML = ''; } function setStatus(msg) { statusEl.textContent = msg; } function esc(s) { return String(s) .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"'); } /* ------------------------------------------------------------------ Tab switching — persists choice in localStorage ------------------------------------------------------------------ */ var STORAGE_KEY = 'search-tab'; function activateTab(target) { document.querySelectorAll('.search-tab').forEach(function (b) { var active = b.dataset.tab === target; b.classList.toggle('is-active', active); b.setAttribute('aria-selected', active ? 'true' : 'false'); }); document.querySelectorAll('.search-panel').forEach(function (p) { p.classList.toggle('is-active', p.dataset.panel === target); }); try { localStorage.setItem(STORAGE_KEY, target); } catch (e) {} } document.querySelectorAll('.search-tab').forEach(function (btn) { btn.addEventListener('click', function () { activateTab(btn.dataset.tab); }); }); /* Restore last-used tab (falls back to keyword if unset or unrecognised) */ var saved = null; try { saved = localStorage.getItem(STORAGE_KEY); } catch (e) {} if (saved === 'semantic') activateTab('semantic'); /* ------------------------------------------------------------------ Input handling — debounced, 400 ms ------------------------------------------------------------------ */ var debounceTimer = null; queryEl.addEventListener('input', function () { clearTimeout(debounceTimer); var q = queryEl.value.trim(); if (!q) { clearResults(); setStatus(''); return; } debounceTimer = setTimeout(function () { runSearch(q); }, 400); }); /* Pre-fill from ?q= on load — mirror keyword search behaviour */ var params = new URLSearchParams(window.location.search); var initial = params.get('q'); if (initial) { queryEl.value = initial; runSearch(initial); } }());