libresilient/service-worker.js

/*
 * LibResilient Service Worker.
 *
 * Default strategy:
 *    1. Try to load from main website.
 *    2. If loading fails, load from LibResilient.
 *    3. If loading is too slow, load from LibResilient.
 *    4. If loaded content doesn't match authenticated versions, fall back to
 *       LibResilient.
 */


/*
 * we need a Promise.any() polyfill
 * so here it is
 * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/any
 *
 * TODO: remove once Promise.any() is implemented broadly
 */
if (typeof Promise.any === 'undefined') {
    Promise.any = async (promises) => {
        // Promise.all() is the polar opposite of Promise.any()
        // in that it returns as soon as there is a first rejection
        // but without it, it returns an array of resolved results
        return Promise.all(
            promises.map(p => {
                return new Promise((resolve, reject) =>
                    // swap reject and resolve, so that we can use Promise.all()
                    // and get the result we need
                    Promise.resolve(p).then(reject, resolve)
                );
            })
        // now, swap errors and values back
        ).then(
            err => Promise.reject(err),
            val => Promise.resolve(val)
        );
    };
}

// initialize the LibResilientPlugins array
if (!Array.isArray(self.LibResilientPlugins)) {
    self.LibResilientPlugins = new Array()
}

// initialize the LibResilientConfig array
//
// this also sets some sane defaults,
// which then can be modified via config.js
if (typeof self.LibResilientConfig !== 'object' || self.LibResilientConfig === null) {
    self.LibResilientConfig = {
        // how long do we wait before we decide that a plugin is unresponsive,
        // and move on?
        defaultPluginTimeout: 10000,
        // plugins settings namespace
        //
        // this defines which plugins get loaded,
        // and the order in which they are deployed to try to retrieve content
        // assumption: plugin path = ./plugins/<plugin-name>.js
        //
        // this relies on JavaScript preserving the insertion order for properties
        // https://stackoverflow.com/a/5525820
        plugins: {
            'fetch':{},
            'cache':{},
            'alt-fetch':{},
            'gun-ipfs':{}
        },
        // which components should be logged?
        // this is an array of strings, components not listed here
        // will have their debug output disabled
        //
        // by default, the service worker and all enabled plugins
        // (so, all components that are used)
        loggedComponents: [
            'service-worker',
            'fetch',
            'cache',
            'alt-fetch',
            'gun-ipfs'
        ]
    }
}


/**
 * internal logging facility
 *
 * component - name of the component being logged about
 *             if the component is not in the LibResilientConfig.loggedComponents array,
 *             message will not be displayed
 * items     - the rest of arguments will be passed to console.debug()
 */
self.log = function(component, ...items) {
    if (self.LibResilientConfig.loggedComponents.indexOf(component) >= 0) {
        console.debug(`LibResilient [COMMIT_UNKNOWN, ${component}] ::`, ...items)
    }
}


// load the plugins
//
// everything in a try-catch block
// so that we get an informative message if there's an error
try {

    // get the config
    self.importScripts("./config.js")

    // only now load the plugins (config.js could have changed the defaults)
    var plugins = Object.keys(self.LibResilientConfig.plugins)
    for (var i=0; i<plugins.length; i++) {
        // load a plugin
        self.importScripts(`./plugins/${plugins[i]}.js`)
        // check if it loaded properly
        var plugin = LibResilientPlugins.find(p=>p.name===plugins[i])
        if (plugin === undefined) {
            throw new Error(`Plugin not found: ${plugins[i]} (available plugins: ${LibResilientPlugins.map(p=>p.name).join(', ')})`)
        }
        // make sure that the indirect flag is set if needed
        if (self.LibResilientConfig.plugins[plugin.name].indirect===true) {
            plugin.indirect=true
            self.log('service-worker', `Loaded plugin: ${plugin.name} (indirect)`)
        } else {
            self.log('service-worker', `Loaded plugin: ${plugin.name}`)
        }
        // make sure plugins used by the just-loaded plugin are also added to the list
        // but don't load a plugin twice
        if (typeof plugin.uses !== "undefined") {
            for (p in plugin.uses) {
                if (plugins.indexOf(p) < 0) {
                    // okay, this plugin has not been added to the plugins list yet
                    // let's do that
                    plugins.push(p)
                    // but also, let's make sure that the config for them is available for use
                    var pConfig = {...self.LibResilientConfig.plugins[p], ...plugin.uses[p]}
                    // set the indirect flag,
                    // since we only have this plugin here to facilitate use by other plugins
                    pConfig.indirect = true
                    // set the config
                    self.LibResilientConfig.plugins[p] = pConfig
                }
            }
        }
    }

    // inform
    self.log('service-worker', `DEBUG: Strategy in use: ${LibResilientPlugins.filter(p=>(!p.indirect)).map(p=>p.name).join(', ')}`)

} catch(e) {
    // we only get a cryptic "Error while registering a service worker"
    // unless we explicitly print the errors out in the console
    console.error(e)
    throw e
}

/**
 * fetch counter per clientId
 *
 * we need to keep track of active fetches per clientId
 * so that we can inform a given clientId when we're completely done
 */
self.activeFetches = {}

/**
 * decrement fetches counter
 * and inform the correct clientId if all is finished done
 */
let decrementActiveFetches = (clientId) => {
    // decrement the fetch counter for the client
    self.activeFetches[clientId]--
    self.log('service-worker', '+-- activeFetches[' + clientId + ']:', self.activeFetches[clientId])
    if (self.activeFetches[clientId] === 0) {
        self.log('service-worker', 'All fetches done!')
        // inform the client
        // client has to be smart enough to know if that is just temporary
        // (and new fetches will fire in a moment, because a CSS file just
        //  got fetched) or not
        clients.get(clientId).then((client)=>{
            client.postMessage({
                allFetched: true
            })
        })
        .then(()=>{
            self.log('service-worker', 'all-fetched message sent.')
        })
    }
}

/*
 * returns a Promise that either resolves or rejects after a set timeout
 * optionally with a specific error message
 *
 * time             - the timeout (in ms)
 * timeout_resolves - whether the Promise should resolve() or reject() when hitting the timeout (default: false (reject))
 * error_message    - optional error message to use when rejecting (default: false (no error message))
 */
let promiseTimeout = (time, timeout_resolves=false, error_message=false) => {
    return new Promise((resolve, reject)=>{
        setTimeout(()=>{
            if (timeout_resolves) {
                resolve(time);
            } else {
                if (error_message) {
                    reject(new Error(error_message))
                } else {
                    reject(time)
                }
            }
        },time);
    });
};


/* ========================================================================= *\
|* === LibResilientResourceInfo                                              === *|
\* ========================================================================= */


/**
 * LibResilient resource info class
 *
 * keeps the values as long as the service worker is running,
 * and communicates all changes to relevant clients
 *
 * clients are responsible for saving and keeping the values across
 * service worker restarts, if that's required
 */
let LibResilientResourceInfo = class {

    // actual values of the fields
    // only used internally, and stored into the Indexed DB
    values = {
        url: '', // read only after initialization
        clientId: null,
        fetchError: null,
        method: null,
        state: null, // can be "error", "success", "running"
        serviceWorker: 'COMMIT_UNKNOWN' // this will be replaced by commit sha in CI/CD; read-only
    }
    client = null;

    /**
     * constructor
     * needed to set the URL and clientId
     */
    constructor(url, clientId) {
        // set it
        this.values.url = url
        this.values.clientId = clientId
        // we might not have a non-empty clientId if it's a cross-origin fetch
        if (clientId) {
            // get the client from Client API based on clientId
            clients.get(clientId).then((client)=>{
                // set the client
                this.client = client
                // Send a message to the client.
                this.client.postMessage(this.values);
            })
        }
    }

    /**
     * update this.values and immediately postMessage() to the relevant client
     *
     * data - an object with items to set in this.values
     */
    update(data) {
        // debug
        var msg = 'Updated LibResilientResourceInfo for: ' + this.values.url
        // was there a change? if not, no need to postMessage
        var changed = false
        // update the properties that are read-write
        Object
            .keys(data)
            .filter((k)=>{
                return ['fetchError', 'method', 'state'].includes(k)
            })
            .forEach((k)=>{
                msg += '\n+-- ' + k + ': ' + data[k]
                if (this.values[k] !== data[k]) {
                    msg += ' (changed!)'
                    changed = true
                }
                this.values[k] = data[k]
            })
        self.log('service-worker', msg)
        // send the message to the client
        if (this.client && changed) {
            this.client.postMessage(this.values);
        }
    }

    /**
     * fetchError property
     */
    get fetchError() {
        return this.values.fetchError
    }

    /**
     * method property
     */
    get method() {
        return this.values.method
    }

    /**
     * state property
     */
    get state() {
        return this.values.state
    }

    /**
     * serviceWorker property (read-only)
     */
    get serviceWorker() {
        return this.values.serviceWorker
    }

    /**
     * url property (read-only)
     */
    get url() {
        return this.values.url
    }

    /**
     * clientId property (read-only)
     */
    get clientId() {
        return this.values.clientId
    }
}

/* ========================================================================= *\
|* === Main Brain of LibResilient                                            === *|
\* ========================================================================= */

/**
 * run a plugin's fetch() method
 * while handling all the auxiliary stuff like saving info in reqInfo
 *
 * plugin    - the plugin to use
 * url       - string containing the URL to fetch
 * reqInfo   - instance of LibResilientResourceInfo
 */
let libresilientFetch = (plugin, url, reqInfo) => {

    // status of the plugin
    reqInfo.update({
        method: plugin.name,
        state: "running"
    })

    // log stuff
    self.log('service-worker', "LibResilient Service Worker handling URL:", url,
                '\n+-- using method(s):', plugin.name)

    // race the plugin(s) vs. a timeout
    return Promise.race([
        plugin.fetch(url),
        promiseTimeout(
            self.LibResilientConfig.defaultPluginTimeout,
            false,
            `LibResilient request using ${plugin.name} timed out after ${self.LibResilientConfig.defaultPluginTimeout}ms.`
        )
    ])
}


/**
 * calling a libresilient plugin function on the first plugin that implements it
 *
 * call - method name to call
 * args - arguments that will be passed to it
 */
let callOnLibResilientPlugin = (call, args) => {
    // find the first plugin implementing the method
    for (i=0; i<LibResilientPlugins.length; i++) {
        if (typeof LibResilientPlugins[i][call] === 'function') {
            self.log('service-worker', 'Calling plugin ' + LibResilientPlugins[i].name + '.' + call + '()')
            // call it
            return LibResilientPlugins[i][call].apply(null, args)
        }
    }
}

/**
 * Cycles through all the plugins, in the order they got registered,
 * and returns a Promise resolving to a Response in case any of the plugins
 * was able to get the resource
 *
 * request    - string containing the URL we want to fetch
 * clientId   - string containing the clientId of the requesting client
 * useStashed - use stashed resources; if false, only pull resources from live sources
 * doStash    - stash resources once fetched successfully; if false, do not stash pulled resources automagically
 * stashedResponse - TBD
 */
let getResourceThroughLibResilient = (request, clientId, useStashed=true, doStash=true, stashedResponse=null) => {

    // clean the URL, removing any fragment identifier
    var url = request.url.replace(/#.+$/, '');

    // set-up reqInfo for the fetch event
    var reqInfo = new LibResilientResourceInfo(url, clientId)

    // fetch counter
    self.activeFetches[clientId]++

    // filter out stash plugins if need be
    var LibResilientPluginsRun = LibResilientPlugins.filter((plugin)=>{
        return ( (!plugin.indirect) && (useStashed || typeof plugin.stash !== 'function') )
    })

    /**
     * this uses Array.reduce() to chain the LibResilientPlugins[]-generated Promises
     * using the Promise the first registered plugin as the default value
     *
     * see: https://css-tricks.com/why-using-reduce-to-sequentially-resolve-promises-works/
     *
     * this also means that LibResilientPlugins[0].fetch() below will run first
     * (counter-intutively!)
     *
     * we are slice()-ing it so that the first plugin is only run once; it is
     * run in the initialValue parameter below already
     *
     * ref:
     * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Reduce
     */
    return LibResilientPluginsRun
        .slice(1)
        .reduce(
        (prevPromise, currentPlugin)=>{
            return prevPromise.catch((error)=>{
                self.log('service-worker', "LibResilient plugin error for:", url,
                            '\n+-- method : ' + reqInfo.method,
                            '\n+-- error  : ' + error.toString())
                // save info in reqInfo -- status of the previous method
                reqInfo.update({
                    state: "error",
                    fetchError: error.toString()
                })
                return libresilientFetch(currentPlugin, url, reqInfo)
            })
        },
        // this libresilientFetch() will run first
        // all other promises generated by LibResilientPlugins[] will be chained on it
        // using the catch() in reduce() above
        // skipping this very first plugin by way of slice(1)
        libresilientFetch(LibResilientPluginsRun[0], url, reqInfo)
    )
    .then((response)=>{
        // we got a successful response
        decrementActiveFetches(clientId)

        // record the success
        reqInfo.update({state:"success"})

        // get the plugin that was used to fetch content
        plugin = LibResilientPlugins.find(p=>p.name===reqInfo.method)

        // if it's a stashing plugin...
        if (typeof plugin.stash === 'function') {
            // we obviously do not want to stash
            self.log('service-worker', 'Not stashing, since resource is already retrieved by a stashing plugin:', url);
            // since we got the data from a stashing plugin,
            // let's run the rest of plugins in the background to check if we can get a fresher resource
            // and stash it in cache for later use
            self.log('service-worker', 'starting background no-stashed fetch for:', url);
            // event.waitUntil?
            // https://stackoverflow.com/questions/37902441/what-does-event-waituntil-do-in-service-worker-and-why-is-it-needed/37906330#37906330
            getResourceThroughLibResilient(request, clientId, false, true, response.clone())
            // return the response so that stuff can keep happening
            return response

        // otherwise, let's see if we want to stash
        // and if we already had a stashed version that differs
        } else {

            // do we have a stashed version that differs?
            if (stashedResponse && stashedResponse.headers) {
                // this is where we check if the response from whatever plugin we got it from
                // is newer than what we've stashed
                self.log('service-worker', 'checking freshness of stashed version of:', url,
                            '\n+-- stashed from   :', stashedResponse.headers.get('X-LibResilient-Method'),
                            '\n+-- fetched using  :', response.headers.get('X-LibResilient-Method'),
                            '\n+-- stashed X-LibResilient-ETag   :', stashedResponse.headers.get('X-LibResilient-ETag'),
                            '\n+-- fetched X-LibResilient-ETag   :', response.headers.get('X-LibResilient-ETag'))
                // if the method does not match, or if it matches but the ETag doesn't
                // we have a different response
                // which means *probably* fresher content
                if ( ( stashedResponse.headers.get('X-LibResilient-Method') !== response.headers.get('X-LibResilient-Method') )
                  || ( stashedResponse.headers.get('X-LibResilient-ETag') !== response.headers.get('X-LibResilient-ETag') ) ) {
                    // inform!
                    self.log('service-worker', 'fetched version method or ETag differs from stashed for:', url)
                    clients.get(reqInfo.clientId).then((client)=>{
                        client.postMessage({
                            url: url,
                            fetchedDiffers: true
                        })
                    })
                }
            }

            // do we want to stash?
            if (doStash) {
                // find the first stashing plugin
                for (i=0; i<LibResilientPlugins.length; i++) {
                    if (typeof LibResilientPlugins[i].stash === 'function') {

                        // ok, now we're in business
                        var hdrs = '\n+-- headers:'
                        response.headers.forEach((v, k)=>{
                            hdrs += `\n    +-- ${k} : ${v}`
                        })
                        self.log(
                                'service-worker',
                                `stashing a successful fetch of: ${url}`,
                                `\n+-- fetched using  : ${response.headers.get('X-LibResilient-Method')}`,
                                `\n+-- stashing using : ${LibResilientPlugins[i].name}`,
                                hdrs
                            )

                        // working on clone()'ed response so that the original one is not touched
                        // TODO: should a failed stashing break the flow here? probably not!
                        return LibResilientPlugins[i].stash(response.clone(), url)
                            .then((res)=>{
                                // original response will be needed further down
                                return response
                            })
                    }
                }
            }
        }
        // if we're here it means we went through the whole list of plugins
        // and found not a single stashing plugin
        // or we don't want to stash the resources in the first place
        // that's fine, but let's make sure the response goes forth
        return response
    })
    // a final catch... in case all plugins fail
    .catch((err)=>{
        self.log('service-worker', "LibResilient also failed completely: ", err,
                    '\n+-- URL    : ' + url)

        // cleanup
        reqInfo.update({
            state: "error",
            fetchError: err.toString()
        })
        // this is very naïve and should in fact be handled
        // inside the relevant plugin, probably
        // TODO: is this even needed?
        reqInfo.update({method: null})
        decrementActiveFetches(clientId)
        // rethrow
        throw err
    })
}

/* ========================================================================= *\
|* === Setting up the event handlers                                     === *|
\* ========================================================================= */

self.addEventListener('install', event => {
    // TODO: Might we want to have a local cache?
    // "COMMIT_UNKNOWN" will be replaced with commit ID
    self.log('service-worker', "0. Installed LibResilient Service Worker (commit: COMMIT_UNKNOWN).");
    // TODO: should we do some plugin initialization here?
});

self.addEventListener('activate', event => {
    self.log('service-worker', "1. Activated LibResilient Service Worker (commit: COMMIT_UNKNOWN).");
    // TODO: should we do some plugin initialization here?
});

self.addEventListener('fetch', event => {

    // if event.resultingClientId is available, we need to use this
    // otherwise event.clientId is what we want
    // ref. https://developer.mozilla.org/en-US/docs/Web/API/FetchEvent/resultingClientId
    var clientId = event.clientId
    if (event.resultingClientId) {
        clientId = event.resultingClientId
        // yeah, we seem to have to send the client their clientId
        // because there is no way to get that client-side
        // and we need that for sane messaging later
        //
        // so let's also send the plugin list, why not
        //
        // *sigh* JS is great *sigh*
        clients
            .get(clientId)
            .then((client)=>{
                client.postMessage({
                    clientId: clientId,
                    plugins: LibResilientPlugins.filter(p=>(!p.indirect)).map((p)=>{return p.name}),
                    serviceWorker: 'COMMIT_UNKNOWN'
                })
            })
    }

    // counter!
    if (typeof self.activeFetches[clientId] !== "number") {
        self.activeFetches[clientId] = 0
    }

    // info
    self.log('service-worker', "Fetching!",
                "\n+-- url              :", event.request.url,
                "\n+-- clientId         :", event.clientId,
                "\n+-- resultingClientId:", event.resultingClientId,
                "\n    +-- activeFetches[" + clientId + "]:", self.activeFetches[clientId]
               )

    // External requests go through a regular fetch()
    if (!event.request.url.startsWith(self.location.origin)) {
        return void event.respondWith(fetch(event.request));
    }

    // Non-GET requests go through a regular fetch()
    if (event.request.method !== 'GET') {
        return void event.respondWith(fetch(event.request));
    }

    // GET requests to our own domain that are *not* #libresilient-info requests
    // get handled by plugins in case of an error
    return void event.respondWith(getResourceThroughLibResilient(event.request, clientId))
});


/**
 * assumptions to be considered:
 * every message contains clientId (so that we know where to respond if/when we need to)
 */
self.addEventListener('message', (event) => {

    // inform
    var msg = 'Message received!'
    Object.keys(event.data).forEach((k)=>{
        msg += '\n+-- key: ' + k + " :: val: " + event.data[k]
    })
    self.log('service-worker', msg);

    /*
     * supporting stash(), unstash(), and publish() only
     */
    if (event.data.stash || event.data.unstash || event.data.publish) {
        if (event.data.stash) {
            callOnLibResilientPlugin('stash', event.data.stash)
        }
        if (event.data.unstash) {
            callOnLibResilientPlugin('unstash', event.data.unstash)
        }
        if (event.data.publish) {
            callOnLibResilientPlugin('publish', event.data.publish)
        }
    }
});