libresilient/plugins/dnslink-fetch/index.js

241 wiersze
10 KiB
JavaScript

/* ========================================================================= *\
|* === HTTP(S) fetch() from alternative endpoints === *|
\* ========================================================================= */
/**
* this plugin does not implement any push method
*
* NOTICE: this plugin uses Promise.any()
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/any
* the polyfill is implemented in LibResilient's service-worker.js
*/
// no polluting of the global namespace please
(function(LRPC){
// this never changes
const pluginName = "dnslink-fetch"
LRPC.set(pluginName, (LR, init={})=>{
/*
* plugin config settings
*/
// sane defaults
let defaultConfig = {
// how many simultaneous connections to different endpoints do we want
//
// more concurrency means higher chance of a request succeeding
// but uses more bandwidth and other resources;
//
// 3 seems to be a reasonable default
concurrency: 3,
// DNS-over-HTTPS JSON API provider
//
// by default using Hostux DoH provider, info here:
// - https://dns.hostux.net/en/
//
// other known DoH JSON API providers:
// - 'https://cloudflare-dns.com/dns-query'
// - 'https://mozilla.cloudflare-dns.com/dns-query'
// - 'https://dns.google/resolve'
dohProvider: 'https://dns.hostux.net/dns-query',
// should the EDNS Client Subnet be masked from authoritative DNS servers for privacy?
// - https://en.wikipedia.org/wiki/EDNS_Client_Subnet
// - https://developers.google.com/speed/public-dns/docs/doh/json#supported_parameters
ecsMasked: true
}
// merge the defaults with settings from the init var
let config = {...defaultConfig, ...init}
// reality check: dohProvider must be a string
if (typeof(config.dohProvider) !== "string" || (config.dohProvider == '')) {
let err = new Error("dohProvider not confgured")
console.error(err)
throw err
}
/**
* retrieving the alternative endpoints list from dnslink
*
* returns an array of strings, each being a valid endpoint, in the form of
* scheme://example.org[/optional/path]
*/
let resolveEndpoints = async (domain) => {
// pretty self-explanatory:
// DoH provider, _dnslink label in the domain, TXT type, pretty please
var query = `${config.dohProvider}?name=_dnslink.${domain}&type=TXT`
// do we want to mask the EDNS Client Subnet?
//
// this protects user privacy somewhat by telling the DoH provider not to disclose
// the subnet from which the DNS request came to authoritiative nameservers
if (config.ecsMasked) {
query += '&edns_client_subnet=0.0.0.0/0'
}
// make the query, get the response
var response = await fetch(
query, {
headers: {
'accept': 'application/json',
}
})
.then(r=>r.json())
// we need an object here
if (typeof response !== 'object') {
throw new Error('Response is not a valid JSON')
}
// only Status == 0 is acceptable
// https://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-6
if (!('Status' in response) || response.Status != 0) {
throw new Error(`DNS request failure, status code: ${response.Status}`)
}
// we also do need the Answer section please
if (!('Answer' in response) || (typeof response.Answer !== 'object') || (!Array.isArray(response.Answer))) {
throw new Error(`DNS response did not contain a valid Answer section`)
}
// only get TXT records, and extract the data from them
response = response
.Answer
.filter(r => r.type == 16)
.map(r => r.data);
// did we get anything of value? anything at all?
if (response.length < 1) {
throw new Error(`Answer section of the DNS response did not contain any TXT records`)
}
// filter by 'dnslink="/https?/', morph into scheme://...
// we can't rely on the data not to be wrapped in quotation marks, so we need to correct for that too
let re = /^"?dnslink=\/(https?)\/([^"]+)"?$/
response = response
.filter(r => re.test(r))
.map(r => r.replace(re, "$1:\/\/$2"));
// do we have anything to work with?
if (response.length < 1) {
throw new Error(`No TXT record contained http or https endpoint definition`)
}
// in case we need some debugging
LR.log(pluginName, '+-- alternative endpoints from DNSLink:\n - ', response.join('\n - '))
// this should be what we're looking for - an array of URLs
return response
}
/**
* getting content using regular HTTP(S) fetch()
*/
let fetchContentFromAlternativeEndpoints = async (url, init={}) => {
// remove the https://original.domain/ bit to get the relative path
// TODO: this assumes that URLs we handle are always relative to the root
// TODO: of the original domain, this needs to be documented
var urlData = url.replace(/https?:\/\//, '').split('/')
var domain = urlData.shift()
var path = urlData.join('/')
LR.log(pluginName, '+-- fetching:\n',
` - domain: ${domain}\n`,
` - path: ${path}\n`
)
// we really want to make fetch happen, Regina!
// TODO: this change should *probably* be handled on the Service Worker level
init.cache = 'reload'
// we don't want to modify the original endpoints array
var sourceEndpoints = await resolveEndpoints(domain)
// if we have fewer than the configured concurrency or just as many, use all of them
if (sourceEndpoints.length <= config.concurrency) {
var useEndpoints = sourceEndpoints
// otherwise get `config.concurrency` endpoints at random
} else {
var useEndpoints = new Array()
while (useEndpoints.length < config.concurrency) {
useEndpoints.push(
sourceEndpoints
.splice(Math.floor(Math.random() * sourceEndpoints.length), 1)[0]
)
}
}
// add the rest of the path to each endpoint
useEndpoints.forEach((endpoint, index) => {
useEndpoints[index] = endpoint + '/' + path;
});
// debug log
LR.log(pluginName, `+-- fetching from alternative endpoints:\n - ${useEndpoints.join('\n - ')}`)
return Promise.any(
useEndpoints.map(
u=>fetch(u, init)
))
.then((response) => {
// 4xx? 5xx? that's a paddlin'
if (response.status >= 400) {
// throw an Error to fall back to other plugins:
throw new Error('HTTP Error: ' + response.status + ' ' + response.statusText);
}
// all good, it seems
LR.log(pluginName, "fetched:", response.url);
// we need to create a new Response object
// with all the headers added explicitly,
// since response.headers is immutable
var responseInit = {
status: response.status,
statusText: response.statusText,
headers: {},
url: url
};
response.headers.forEach(function(val, header){
responseInit.headers[header] = val;
});
// add the X-LibResilient-* headers to the mix
responseInit.headers['X-LibResilient-Method'] = pluginName
// we will not have it most of the time, due to CORS rules:
// https://developer.mozilla.org/en-US/docs/Glossary/CORS-safelisted_response_header
responseInit.headers['X-LibResilient-ETag'] = response.headers.get('ETag')
if (responseInit.headers['X-LibResilient-ETag'] === null) {
// far from perfect, but what are we going to do, eh?
responseInit.headers['X-LibResilient-ETag'] = response.headers.get('last-modified')
}
// return the new response, using the Blob from the original one
return response
.blob()
.then((blob) => {
return new Response(
blob,
responseInit
)
})
})
}
// return the plugin data structure
return {
name: pluginName,
description: 'HTTP(S) fetch() using alternative endpoints retrieved via DNSLink',
version: 'COMMIT_UNKNOWN',
fetch: fetchContentFromAlternativeEndpoints
}
})
// done with not polluting the global namespace
})(LibResilientPluginConstructors)