# archive.conf — `X-Robots-Tag: noindex, noarchive` for the link archive. # # Place at /etc/nginx/snippets/archive.conf and `include` it inside the # levineuwirth.org server { } block, *after* security-headers.conf: # # server { # server_name levineuwirth.org; # root /var/www/levineuwirth.org; # ... # include snippets/security-headers.conf; # include snippets/static-assets.conf; # include snippets/popup-proxy.conf; # include snippets/archive.conf; # } # # Why a location header rather than robots.txt: a URL blocked by # robots.txt can still appear in results when externally linked, and the # noindex directive must be reachable. Wrapper pages carry the meta in # HTML, and the HTML snapshots have the same meta injected at fetch # time. But raw PDFs cannot carry meta directives — and a robots.txt # Disallow on /archive/ would prevent crawlers from reading the wrapper # meta in the first place. The header form is the right control for the # whole tree: crawlers honour it for any resource, HTML or PDF. # # `^~` makes this prefix-match take priority over any regex location # that might match the same path. location ^~ /archive/ { # nginx's add_header chain is inherited from a parent context ONLY # when the current context declares no add_header directives — see # nginx.org/en/docs/http/ngx_http_headers_module.html. Adding any # header inside this location would silently drop the baseline # security headers within the /archive/ subtree, so we re-include # security-headers.conf to keep HSTS, CSP, X-Frame-Options, etc. # intact for archive pages and raw artifacts. include snippets/security-headers.conf; # `always` so the header is emitted even on 4xx/5xx responses (the # default add_header only sets on 2xx/3xx; without `always` a 404 # under /archive/ could be indexed). add_header X-Robots-Tag "noindex, noarchive" always; # Hand off to the same static-file fallback as the rest of the site. try_files $uri $uri/index.html $uri.html =404; }