From b3fc8a1283bc1b295eb326d58ffa5c7a756b4f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Fri, 5 Jul 2024 11:06:41 +0200 Subject: [PATCH 01/15] feat(cache): use vary headers to compare cached response with request headers. --- lib/interceptor/cache.js | 94 +++++++++++++++++++++++++++++++++++----- package.json | 3 +- test/cache.js | 71 ++++++++++++++++++++++++++++-- 3 files changed, 153 insertions(+), 15 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index f5397f6..1e32637 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -23,6 +23,9 @@ class CacheHandler extends DecoratorHandler { } onHeaders(statusCode, rawHeaders, resume, statusMessage, headers = parseHeaders(rawHeaders)) { + // console.log('onHeaders, headers:') + // console.log(headers) + if (statusCode !== 307) { return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -58,7 +61,7 @@ class CacheHandler extends DecoratorHandler { statusMessage, rawHeaders, rawTrailers: null, - body: [], + body: [], // Why is the body emptied? When we cache it again it won't have a body. }, size: (rawHeaders?.reduce((xs, x) => xs + x.length, 0) ?? 0) + @@ -84,15 +87,38 @@ class CacheHandler extends DecoratorHandler { } } return this.#handler.onData(chunk) + /* + Is 'this.#handler.onData' the previous dispatcher in the chain, e.g. 'redirect'? + And in 'redirect.onData(chunk)' it once again calls 'this.#handler.onData(chunk)'. + Would that be 'responseVerify.onData(chunk)'? + */ } - onComplete(rawTrailers) { + onComplete(rawTrailers, opts) { + console.log('onComplete, value: ' + this.#value) + console.log('onComplete, opts:') + console.log(opts) + if (this.#value) { this.#value.data.rawTrailers = rawTrailers this.#value.size += rawTrailers?.reduce((xs, x) => xs + x.length, 0) ?? 0 + + console.log('OnComplete, cache store is being set to: ') + console.log([this.#key, this.#value.data, { ttl: this.#value.ttl, size: this.#value.size }]) + + /* + Why are we setting the cache with the same data as the entry we fetched earlier + from the very same cache? + + We have the request data in the `opts` variable, but where is the response data that we need to cache? + Is the response cached somewhere else? + + We have the headers we need from the request. But we need the response data to know the vary-header + and we also need it to store the response. + */ this.#store.set(this.#key, this.#value.data, { ttl: this.#value.ttl, size: this.#value.size }) } - return this.#handler.onComplete(rawTrailers) + return this.#handler.onComplete(rawTrailers, opts) } } @@ -105,6 +131,9 @@ class CacheStore { } set(key, value, opts) { + console.log('setting cache with values:') + console.log({ key, value, opts }) + this.cache.set(key, value, opts) } @@ -115,12 +144,46 @@ class CacheStore { function makeKey(opts) { // NOTE: Ignores headers... - return `${opts.origin}:${opts.method}:${opts.path}` + // return `${opts.origin}:${opts.method}:${opts.path}` + return `${opts.method}:${opts.path}` +} + +function varyHeadersMatchRequest(varyHeaders, requestHeaders) { + // const headersToString = [] + // for(const header of cachedRawHeaders){ + // headersToString.push(header.toString()) + // } + + // const varyHeaders = headersToString.reduce((acc, cur, index, arr) => { + // if (index % 2 === 0) { + // acc[cur] = arr[index + 1]; + // } + // return acc; + // }, {}); + + // Early return if `varyHeaders` is null/undefined or an empty object + if (!varyHeaders || Object.keys(varyHeaders).length === 0) { + return true + } + const varyKeys = Object.keys(varyHeaders) + // All vary headers must match request headers, return true/false. + return varyKeys.every((varyKey) => varyHeaders[varyKey] === requestHeaders[varyKey]) +} + +function findEntryByHeaders(entries, requestHeaders) { + return entries.find((entry) => varyHeadersMatchRequest(entry, requestHeaders)) } const DEFAULT_CACHE_STORE = new CacheStore({ maxSize: 128 * 1024, maxEntrySize: 1024 }) export default (opts) => (dispatch) => (opts, handler) => { + console.log('cache dispatcher:') + console.log(dispatch) + console.log('opts:') + console.log(opts) + console.log('handler:') + console.log(handler) + if (!opts.cache || opts.upgrade) { return dispatch(opts, handler) } @@ -156,15 +219,24 @@ export default (opts) => (dispatch) => (opts, handler) => { } let key = makeKey(opts) - let value = store.get(key) + console.log('getting key: ' + key) + let entries = store.get(key) - if (value == null && opts.method === 'HEAD') { + console.log('Found entries in cache: ') + console.log(entries) + + // if key with method:'HEAD' didn't yield results, retry with method:'GET' + if (entries.length === 0 && opts.method === 'HEAD') { key = makeKey({ ...opts, method: 'GET' }) - value = store.get(key) + entries = store.get(key) + // value = {data: {headers: {vary: {origin: "www.google.com"}}} } - if (value) { - const { statusCode, statusMessage, rawHeaders, rawTrailers, body } = value + // Find an entry that matches the request, if any + const entry = findEntryByHeaders(entries, opts) + + if (entry) { + const { statusCode, statusMessage, rawHeaders, rawTrailers, body } = entry const ac = new AbortController() const signal = ac.signal @@ -186,9 +258,9 @@ export default (opts) => (dispatch) => (opts, handler) => { // TODO (fix): back pressure... } } - handler.onComplete(rawTrailers) + handler.onComplete(rawTrailers, opts) } else { - handler.onComplete([]) + handler.onComplete([], opts) } } catch (err) { handler.onError(err) diff --git a/package.json b/package.json index 26ac3e0..9d7df45 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,8 @@ "prepare": "husky", "prepublishOnly": "pinst --disable", "postpublish": "pinst --enable", - "test": "tap test" + "test": "tap test", + "taprun": "tap run" }, "lint-staged": { "*.{js,jsx,md,ts}": [ diff --git a/test/cache.js b/test/cache.js index 4a8fdc4..cb5635e 100644 --- a/test/cache.js +++ b/test/cache.js @@ -1,24 +1,89 @@ import { test } from 'tap' +// import { LRUCache } from 'lru-cache' import { createServer } from 'node:http' import undici from 'undici' import { interceptors } from '../lib/index.js' -test('cache request', (t) => { +// test('cache request', (t) => { +// t.plan(1) +// const server = createServer((req, res) => { +// res.end('asd') +// }) + +// t.teardown(server.close.bind(server)) +// server.listen(0, async () => { +// const { body } = await undici.request(`http://0.0.0.0:${server.address().port}`, { +// dispatcher: new undici.Agent().compose(interceptors.cache()), +// cache: true, +// }) +// let str = '' +// for await (const chunk of body) { +// str += chunk +// } +// t.equal(str, 'asd') +// }) +// }) + +// class CacheStore { +// constructor({ maxSize = 1024 * 1024 }) { +// this.maxSize = maxSize +// this.cache = new LRUCache({ maxSize }) +// } + +// set(key, value, opts) { +// this.cache.set(key, value, opts) +// } + +// get(key) { +// return this.cache.get(key) +// } +// } + +// Error: "invalid size value (must be positive integer). When maxSize or maxEntrySize is used, sizeCalculation or size must be set." +// +// function exampleCache(){ +// const options = { +// max: 500, +// maxSize: 5000, +// sizeCalculation: (value, key) => { +// return 1 +// }, +// } +// const cache = new CacheStore(options) +// cache.set('GET:/', {data: 'dataFromCache', vary: {'origin': 'http://0.0.0.0:54758', 'Accept-Encoding': 'Application/json'}}, {}) +// cache.set('GET:/foobar', {data: 'dataFromCache'}, {}) +// cache.set('POST:/foo', {data: 'dataFromCache', vary: {'host': '0.0.0.0:54758'}}, {}) +// cache.set('GET:/', {data: { +// headers: [ +// 'Vary': {'origin': 'http://0.0.0.0:54758', 'Accept-Encoding': 'Application/json'} +// ], +// }}) + +// return cache +// } + +test('cache request, vary:host, populated cache', (t) => { t.plan(1) const server = createServer((req, res) => { + res.writeHead(307, { Vary: 'Host' }) res.end('asd') }) t.teardown(server.close.bind(server)) + + // const cache = exampleCache() server.listen(0, async () => { - const { body } = await undici.request(`http://0.0.0.0:${server.address().port}`, { + const response = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), cache: true, }) let str = '' - for await (const chunk of body) { + for await (const chunk of response.body) { str += chunk } + + console.log('response: ') + console.log(response) t.equal(str, 'asd') }) }) From 7fa3d01292da77efe91492de7c4cf28a566d760f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Tue, 9 Jul 2024 09:51:20 +0200 Subject: [PATCH 02/15] feat(cache): debugging testing 307 redirect. --- lib/interceptor/cache.js | 217 ++++++++++++++++++++++++------------ lib/interceptor/proxy.js | 3 + lib/interceptor/redirect.js | 3 + test/cache.js | 55 ++++++++- 4 files changed, 206 insertions(+), 72 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 1e32637..43540e1 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -17,14 +17,17 @@ class CacheHandler extends DecoratorHandler { } onConnect(abort) { + console.log('onConnect abort') + console.log(abort) + this.#value = null return this.#handler.onConnect(abort) } onHeaders(statusCode, rawHeaders, resume, statusMessage, headers = parseHeaders(rawHeaders)) { - // console.log('onHeaders, headers:') - // console.log(headers) + console.log('onHeaders') + console.log({ statusCode, rawHeaders, resume, statusMessage, headers }) if (statusCode !== 307) { return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) @@ -36,6 +39,22 @@ class CacheHandler extends DecoratorHandler { const contentLength = headers['content-length'] ? Number(headers['content-length']) : Infinity const maxEntrySize = this.#store.maxEntrySize ?? Infinity + console.log({ cacheControl, contentLength, maxEntrySize }) + + console.log('onHeaders if statement match:') + + console.log( + contentLength < maxEntrySize && + cacheControl && + cacheControl.public && + !cacheControl.private && + !cacheControl['no-store'] && + !cacheControl['no-cache'] && + !cacheControl['must-understand'] && + !cacheControl['must-revalidate'] && + !cacheControl['proxy-revalidate'], + ) + if ( contentLength < maxEntrySize && cacheControl && @@ -54,6 +73,8 @@ class CacheHandler extends DecoratorHandler { ? 31556952 // 1 year : Number(maxAge) + console.log({ ttl, maxAge, cacheControl, contentLength, maxEntrySize }) + if (ttl > 0) { this.#value = { data: { @@ -61,7 +82,7 @@ class CacheHandler extends DecoratorHandler { statusMessage, rawHeaders, rawTrailers: null, - body: [], // Why is the body emptied? When we cache it again it won't have a body. + body: [], }, size: (rawHeaders?.reduce((xs, x) => xs + x.length, 0) ?? 0) + @@ -70,8 +91,13 @@ class CacheHandler extends DecoratorHandler { ttl: ttl * 1e3, } } + + console.log({ thisvalue: this.#value }) } + console.log('onHeaders, finish:') + console.log({ statusCode, rawHeaders, resume, statusMessage, headers }) + return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -87,41 +113,48 @@ class CacheHandler extends DecoratorHandler { } } return this.#handler.onData(chunk) - /* - Is 'this.#handler.onData' the previous dispatcher in the chain, e.g. 'redirect'? - And in 'redirect.onData(chunk)' it once again calls 'this.#handler.onData(chunk)'. - Would that be 'responseVerify.onData(chunk)'? - */ } - onComplete(rawTrailers, opts) { - console.log('onComplete, value: ' + this.#value) - console.log('onComplete, opts:') - console.log(opts) - + onComplete(rawTrailers) { + console.log('onComplete this:') + console.log({ thisvalue: this.#value }) + console.log({ thisstore: this.#store }) // CacheStore{} + console.log({ thishandler: this.#handler }) // RequestHandler{} + console.log({ thishandlervalue: this.#handler.value }) + console.log({ this: this }) if (this.#value) { this.#value.data.rawTrailers = rawTrailers this.#value.size += rawTrailers?.reduce((xs, x) => xs + x.length, 0) ?? 0 - console.log('OnComplete, cache store is being set to: ') - console.log([this.#key, this.#value.data, { ttl: this.#value.ttl, size: this.#value.size }]) + const opts = this.#handler.opts + const entries = this.#handler.entries + console.log('onComplete this:') + console.log({ opts, entries }) + + const reqHeaders = this.#handler.opts + const resHeaders = parseHeaders(this.#value.data.rawHeaders) - /* - Why are we setting the cache with the same data as the entry we fetched earlier - from the very same cache? + const vary = formatVaryData(resHeaders, reqHeaders) - We have the request data in the `opts` variable, but where is the response data that we need to cache? - Is the response cached somewhere else? + console.log({ vary }) - We have the headers we need from the request. But we need the response data to know the vary-header - and we also need it to store the response. - */ - this.#store.set(this.#key, this.#value.data, { ttl: this.#value.ttl, size: this.#value.size }) + this.#value.vary = vary + + console.log({ entries }) + + this.#store.set(this.#key, entries.push(this.#value)) } - return this.#handler.onComplete(rawTrailers, opts) + return this.#handler.onComplete(rawTrailers) } } +function formatVaryData(resHeaders, reqHeaders) { + return resHeaders.vary + ?.split(',') + .map((key) => key.trim().toLowerCase()) + .map((key) => [key, reqHeaders[key]]) +} + // TODO (fix): Async filesystem cache. class CacheStore { constructor({ maxSize = 1024 * 1024, maxEntrySize = 128 * 1024 }) { @@ -131,9 +164,6 @@ class CacheStore { } set(key, value, opts) { - console.log('setting cache with values:') - console.log({ key, value, opts }) - this.cache.set(key, value, opts) } @@ -142,36 +172,29 @@ class CacheStore { } } -function makeKey(opts) { - // NOTE: Ignores headers... - // return `${opts.origin}:${opts.method}:${opts.path}` - return `${opts.method}:${opts.path}` -} - -function varyHeadersMatchRequest(varyHeaders, requestHeaders) { - // const headersToString = [] - // for(const header of cachedRawHeaders){ - // headersToString.push(header.toString()) - // } - - // const varyHeaders = headersToString.reduce((acc, cur, index, arr) => { - // if (index % 2 === 0) { - // acc[cur] = arr[index + 1]; - // } - // return acc; - // }, {}); - - // Early return if `varyHeaders` is null/undefined or an empty object - if (!varyHeaders || Object.keys(varyHeaders).length === 0) { - return true - } - const varyKeys = Object.keys(varyHeaders) - // All vary headers must match request headers, return true/false. - return varyKeys.every((varyKey) => varyHeaders[varyKey] === requestHeaders[varyKey]) -} - -function findEntryByHeaders(entries, requestHeaders) { - return entries.find((entry) => varyHeadersMatchRequest(entry, requestHeaders)) +function findEntryByHeaders(entries, reqHeaders) { + // Sort entries by number of vary headers in descending order, because + // we want to compare the most complex response to the request first. + entries.sort((a, b) => { + const lengthA = a.vary ? a.vary.length : 0 + const lengthB = b.vary ? b.vary.length : 0 + return lengthB - lengthA + }) + + console.log('Sort entries') + console.log({ entries }) + + console.log('reqHeaders') + console.log({ reqHeaders }) + + return entries?.find( + (entry) => + entry.vary?.every(([key, val]) => { + console.log(`reqHeaders[${key}] === ${val}`) + console.log({ reqHeadersval: reqHeaders[key] }) + return reqHeaders[key] === val + }) ?? true, + ) } const DEFAULT_CACHE_STORE = new CacheStore({ maxSize: 128 * 1024, maxEntrySize: 1024 }) @@ -218,23 +241,77 @@ export default (opts) => (dispatch) => (opts, handler) => { throw new Error(`Cache store not provided.`) } - let key = makeKey(opts) + let key = `${opts.method}:${opts.path}` console.log('getting key: ' + key) let entries = store.get(key) - console.log('Found entries in cache: ') - console.log(entries) - - // if key with method:'HEAD' didn't yield results, retry with method:'GET' - if (entries.length === 0 && opts.method === 'HEAD') { - key = makeKey({ ...opts, method: 'GET' }) + if (Array.isArray(entries) && entries.length === 0 && opts.method === 'HEAD') { + key = `GET:${opts.path}` entries = store.get(key) - // value = {data: {headers: {vary: {origin: "www.google.com"}}} } + // testing + const rawHeaders = [ + Buffer.from('Content-Type'), + Buffer.from('application/json'), + Buffer.from('Content-Length'), + Buffer.from('10'), + Buffer.from('Cache-Control'), + Buffer.from('public'), + ] + // // cannot get the cache to work inside the test, so I hardcode the entries here + entries = [ + { + statusCode: 200, + statusMessage: '', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd1'], + vary: [ + ['Accept', 'application/xml'], + ['User-Agent', 'Mozilla/5.0'], + ], + }, + { + statusCode: 200, + statusMessage: '', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd2'], + vary: [ + ['Accept', 'application/txt'], + ['User-Agent', 'Chrome'], + ['origin2', 'www.google.com/images'], + ], + }, + // { + // statusCode: 200, statusMessage: 'last', rawHeaders, rawTrailers: ['Hello'], body: ['asd3'], + // vary: null }, + { + statusCode: 200, + statusMessage: 'first', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd4'], + vary: [ + ['Accept', 'application/json'], + ['User-Agent', 'Mozilla/5.0'], + ['host2', 'www.google.com'], + ['origin2', 'www.google.com/images'], + ], + }, + ] + + // *testing + // Find an entry that matches the request, if any const entry = findEntryByHeaders(entries, opts) + console.log('Entry found:') + console.log({ entry }) + + // handler.value.vary = 'foobar' + if (entry) { const { statusCode, statusMessage, rawHeaders, rawTrailers, body } = entry const ac = new AbortController() @@ -258,9 +335,9 @@ export default (opts) => (dispatch) => (opts, handler) => { // TODO (fix): back pressure... } } - handler.onComplete(rawTrailers, opts) + handler.onComplete(rawTrailers) } else { - handler.onComplete([], opts) + handler.onComplete([]) } } catch (err) { handler.onError(err) @@ -268,6 +345,8 @@ export default (opts) => (dispatch) => (opts, handler) => { return true } else { - return dispatch(opts, new CacheHandler({ handler, store, key: makeKey(opts) })) + // handler.opts = opts + // handler.entries = entries + return dispatch(opts, new CacheHandler({ handler, store, key })) } } diff --git a/lib/interceptor/proxy.js b/lib/interceptor/proxy.js index 1dd95bb..93a7fc3 100644 --- a/lib/interceptor/proxy.js +++ b/lib/interceptor/proxy.js @@ -14,6 +14,7 @@ class Handler extends DecoratorHandler { } onUpgrade(statusCode, rawHeaders, socket) { + console.log('Proxy onUpgrade') return this.#handler.onUpgrade( statusCode, reduceHeaders( @@ -34,6 +35,7 @@ class Handler extends DecoratorHandler { } onHeaders(statusCode, rawHeaders, resume, statusMessage) { + console.log('Proxy onHeaders') return this.#handler.onHeaders( statusCode, reduceHeaders( @@ -164,6 +166,7 @@ function printIp(address, port) { } export default (opts) => (dispatch) => (opts, handler) => { + console.log('Proxy default dispatch') if (!opts.proxy) { return dispatch(opts, handler) } diff --git a/lib/interceptor/redirect.js b/lib/interceptor/redirect.js index 40a5282..6ccf298 100644 --- a/lib/interceptor/redirect.js +++ b/lib/interceptor/redirect.js @@ -36,6 +36,7 @@ class Handler extends DecoratorHandler { } onConnect(abort) { + console.log('Redirect onConnect') if (this.#aborted) { abort(this.#reason) } else { @@ -48,6 +49,7 @@ class Handler extends DecoratorHandler { } onHeaders(statusCode, rawHeaders, resume, statusText, headers = parseHeaders(rawHeaders)) { + console.log('Redirect onHeaders') if (redirectableStatusCodes.indexOf(statusCode) === -1) { assert(!this.#headersSent) this.#headersSent = true @@ -109,6 +111,7 @@ class Handler extends DecoratorHandler { } onData(chunk) { + console.log('Redirect onData') if (this.#location) { /* https://tools.ietf.org/html/rfc7231#section-6.4 diff --git a/test/cache.js b/test/cache.js index cb5635e..cf812df 100644 --- a/test/cache.js +++ b/test/cache.js @@ -62,16 +62,63 @@ import { interceptors } from '../lib/index.js' // return cache // } -test('cache request, vary:host, populated cache', (t) => { +// test('cache request, found a matching entry in cache', (t) => { +// t.plan(1) +// const server = createServer((req, res) => { +// res.writeHead(200, { Vary: 'Host, Origin, user-agent' }) +// res.end('asd') +// }) + +// t.teardown(server.close.bind(server)) + +// // const cache = exampleCache() +// server.listen(0, async () => { +// const response = await undici.request(`http://0.0.0.0:${server.address().port}`, { +// dispatcher: new undici.Agent().compose( +// interceptors.responseError(), +// interceptors.requestBodyFactory(), +// interceptors.log(), +// interceptors.dns(), +// interceptors.lookup(), +// interceptors.requestId(), +// interceptors.responseRetry(), +// interceptors.responseVerify(), +// interceptors.redirect(), +// interceptors.cache(), +// interceptors.proxy() +// ), +// cache: true, +// Accept: 'application/txt', +// 'User-Agent': 'Chrome', +// origin2: 'www.google.com/images' +// }) +// let str = '' +// for await (const chunk of response.body) { +// str += chunk +// } + +// console.log('response: ') +// console.log(response) +// t.equal(str, 'asd2') +// }) +// }) + +test('cache request, no matching entry found. Store response in cache', (t) => { t.plan(1) const server = createServer((req, res) => { - res.writeHead(307, { Vary: 'Host' }) + res.writeHead(307, { + Vary: 'Host', + 'Cache-Control': 'public, immutable', + 'Content-Length': 1000, + 'Content-Type': 'text/html', + Connection: 'keep-alive', + Location: 'http://www.blankwebsite.com/', + }) res.end('asd') }) t.teardown(server.close.bind(server)) - // const cache = exampleCache() server.listen(0, async () => { const response = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), @@ -86,4 +133,6 @@ test('cache request, vary:host, populated cache', (t) => { console.log(response) t.equal(str, 'asd') }) + + // Here we need to make another request to check if we get back the previous response but from the cache instead. }) From 8f523d51d56529ebf94925d2d10d9aafd2576fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Wed, 10 Jul 2024 15:02:05 +0200 Subject: [PATCH 03/15] feat(cache): test and cache implementation for handling vary header. --- lib/interceptor/cache.js | 153 ++++-------------------- package.json | 3 +- test/cache.js | 244 +++++++++++++++++++++------------------ 3 files changed, 156 insertions(+), 244 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 43540e1..7b5e63f 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -1,5 +1,5 @@ import assert from 'node:assert' -import { LRUCache } from 'lru-cache' +// import { LRUCache } from 'lru-cache' import { DecoratorHandler, parseHeaders, parseCacheControl } from '../utils.js' class CacheHandler extends DecoratorHandler { @@ -17,18 +17,12 @@ class CacheHandler extends DecoratorHandler { } onConnect(abort) { - console.log('onConnect abort') - console.log(abort) - this.#value = null return this.#handler.onConnect(abort) } onHeaders(statusCode, rawHeaders, resume, statusMessage, headers = parseHeaders(rawHeaders)) { - console.log('onHeaders') - console.log({ statusCode, rawHeaders, resume, statusMessage, headers }) - if (statusCode !== 307) { return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -39,22 +33,6 @@ class CacheHandler extends DecoratorHandler { const contentLength = headers['content-length'] ? Number(headers['content-length']) : Infinity const maxEntrySize = this.#store.maxEntrySize ?? Infinity - console.log({ cacheControl, contentLength, maxEntrySize }) - - console.log('onHeaders if statement match:') - - console.log( - contentLength < maxEntrySize && - cacheControl && - cacheControl.public && - !cacheControl.private && - !cacheControl['no-store'] && - !cacheControl['no-cache'] && - !cacheControl['must-understand'] && - !cacheControl['must-revalidate'] && - !cacheControl['proxy-revalidate'], - ) - if ( contentLength < maxEntrySize && cacheControl && @@ -73,8 +51,6 @@ class CacheHandler extends DecoratorHandler { ? 31556952 // 1 year : Number(maxAge) - console.log({ ttl, maxAge, cacheControl, contentLength, maxEntrySize }) - if (ttl > 0) { this.#value = { data: { @@ -91,13 +67,8 @@ class CacheHandler extends DecoratorHandler { ttl: ttl * 1e3, } } - - console.log({ thisvalue: this.#value }) } - console.log('onHeaders, finish:') - console.log({ statusCode, rawHeaders, resume, statusMessage, headers }) - return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -116,33 +87,24 @@ class CacheHandler extends DecoratorHandler { } onComplete(rawTrailers) { - console.log('onComplete this:') - console.log({ thisvalue: this.#value }) - console.log({ thisstore: this.#store }) // CacheStore{} - console.log({ thishandler: this.#handler }) // RequestHandler{} - console.log({ thishandlervalue: this.#handler.value }) - console.log({ this: this }) if (this.#value) { this.#value.data.rawTrailers = rawTrailers - this.#value.size += rawTrailers?.reduce((xs, x) => xs + x.length, 0) ?? 0 + this.#value.size = this.#value.size + ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) + : 0 - const opts = this.#handler.opts const entries = this.#handler.entries - console.log('onComplete this:') - console.log({ opts, entries }) const reqHeaders = this.#handler.opts const resHeaders = parseHeaders(this.#value.data.rawHeaders) - const vary = formatVaryData(resHeaders, reqHeaders) + this.#value.vary = formatVaryData(resHeaders, reqHeaders) - console.log({ vary }) + entries.push(this.#value) - this.#value.vary = vary + sortEntriesByVary(entries) - console.log({ entries }) - - this.#store.set(this.#key, entries.push(this.#value)) + this.#store.set(this.#key, entries) } return this.#handler.onComplete(rawTrailers) } @@ -153,6 +115,7 @@ function formatVaryData(resHeaders, reqHeaders) { ?.split(',') .map((key) => key.trim().toLowerCase()) .map((key) => [key, reqHeaders[key]]) + .filter(([_key, val]) => val) } // TODO (fix): Async filesystem cache. @@ -160,11 +123,11 @@ class CacheStore { constructor({ maxSize = 1024 * 1024, maxEntrySize = 128 * 1024 }) { this.maxSize = maxSize this.maxEntrySize = maxEntrySize - this.cache = new LRUCache({ maxSize }) + this.cache = new Map() } set(key, value, opts) { - this.cache.set(key, value, opts) + this.cache.set(key, value) } get(key) { @@ -172,26 +135,25 @@ class CacheStore { } } -function findEntryByHeaders(entries, reqHeaders) { - // Sort entries by number of vary headers in descending order, because - // we want to compare the most complex response to the request first. +/* + Sort entries by number of vary headers in descending order, because + we need to compare the most complex response to the request first. + A cached response with an empty ´vary´ field will otherwise win every time. +*/ +function sortEntriesByVary(entries) { entries.sort((a, b) => { const lengthA = a.vary ? a.vary.length : 0 const lengthB = b.vary ? b.vary.length : 0 return lengthB - lengthA }) +} - console.log('Sort entries') - console.log({ entries }) - - console.log('reqHeaders') - console.log({ reqHeaders }) +function findEntryByHeaders(entries, reqHeaders) { + sortEntriesByVary(entries) return entries?.find( (entry) => entry.vary?.every(([key, val]) => { - console.log(`reqHeaders[${key}] === ${val}`) - console.log({ reqHeadersval: reqHeaders[key] }) return reqHeaders[key] === val }) ?? true, ) @@ -200,13 +162,6 @@ function findEntryByHeaders(entries, reqHeaders) { const DEFAULT_CACHE_STORE = new CacheStore({ maxSize: 128 * 1024, maxEntrySize: 1024 }) export default (opts) => (dispatch) => (opts, handler) => { - console.log('cache dispatcher:') - console.log(dispatch) - console.log('opts:') - console.log(opts) - console.log('handler:') - console.log(handler) - if (!opts.cache || opts.upgrade) { return dispatch(opts, handler) } @@ -235,6 +190,8 @@ export default (opts) => (dispatch) => (opts, handler) => { // Dump body... opts.body?.on('error', () => {}).resume() + opts.host = opts.host ?? new URL(opts.origin).host + const store = opts.cache === true ? DEFAULT_CACHE_STORE : opts.cache if (!store) { @@ -250,70 +207,10 @@ export default (opts) => (dispatch) => (opts, handler) => { entries = store.get(key) } - // testing - const rawHeaders = [ - Buffer.from('Content-Type'), - Buffer.from('application/json'), - Buffer.from('Content-Length'), - Buffer.from('10'), - Buffer.from('Cache-Control'), - Buffer.from('public'), - ] - // // cannot get the cache to work inside the test, so I hardcode the entries here - entries = [ - { - statusCode: 200, - statusMessage: '', - rawHeaders, - rawTrailers: ['Hello'], - body: ['asd1'], - vary: [ - ['Accept', 'application/xml'], - ['User-Agent', 'Mozilla/5.0'], - ], - }, - { - statusCode: 200, - statusMessage: '', - rawHeaders, - rawTrailers: ['Hello'], - body: ['asd2'], - vary: [ - ['Accept', 'application/txt'], - ['User-Agent', 'Chrome'], - ['origin2', 'www.google.com/images'], - ], - }, - // { - // statusCode: 200, statusMessage: 'last', rawHeaders, rawTrailers: ['Hello'], body: ['asd3'], - // vary: null }, - { - statusCode: 200, - statusMessage: 'first', - rawHeaders, - rawTrailers: ['Hello'], - body: ['asd4'], - vary: [ - ['Accept', 'application/json'], - ['User-Agent', 'Mozilla/5.0'], - ['host2', 'www.google.com'], - ['origin2', 'www.google.com/images'], - ], - }, - ] - - // *testing - - // Find an entry that matches the request, if any const entry = findEntryByHeaders(entries, opts) - console.log('Entry found:') - console.log({ entry }) - - // handler.value.vary = 'foobar' - if (entry) { - const { statusCode, statusMessage, rawHeaders, rawTrailers, body } = entry + const { statusCode, statusMessage, rawHeaders, rawTrailers, body } = entry.data const ac = new AbortController() const signal = ac.signal @@ -345,8 +242,8 @@ export default (opts) => (dispatch) => (opts, handler) => { return true } else { - // handler.opts = opts - // handler.entries = entries + handler.opts = opts + handler.entries = entries return dispatch(opts, new CacheHandler({ handler, store, key })) } } diff --git a/package.json b/package.json index 9d7df45..26ac3e0 100644 --- a/package.json +++ b/package.json @@ -33,8 +33,7 @@ "prepare": "husky", "prepublishOnly": "pinst --disable", "postpublish": "pinst --enable", - "test": "tap test", - "taprun": "tap run" + "test": "tap test" }, "lint-staged": { "*.{js,jsx,md,ts}": [ diff --git a/test/cache.js b/test/cache.js index cf812df..a04d691 100644 --- a/test/cache.js +++ b/test/cache.js @@ -1,138 +1,154 @@ import { test } from 'tap' -// import { LRUCache } from 'lru-cache' import { createServer } from 'node:http' import undici from 'undici' import { interceptors } from '../lib/index.js' -// test('cache request', (t) => { -// t.plan(1) -// const server = createServer((req, res) => { -// res.end('asd') -// }) - -// t.teardown(server.close.bind(server)) -// server.listen(0, async () => { -// const { body } = await undici.request(`http://0.0.0.0:${server.address().port}`, { -// dispatcher: new undici.Agent().compose(interceptors.cache()), -// cache: true, -// }) -// let str = '' -// for await (const chunk of body) { -// str += chunk -// } -// t.equal(str, 'asd') -// }) -// }) - -// class CacheStore { -// constructor({ maxSize = 1024 * 1024 }) { -// this.maxSize = maxSize -// this.cache = new LRUCache({ maxSize }) -// } - -// set(key, value, opts) { -// this.cache.set(key, value, opts) -// } - -// get(key) { -// return this.cache.get(key) -// } -// } - -// Error: "invalid size value (must be positive integer). When maxSize or maxEntrySize is used, sizeCalculation or size must be set." -// -// function exampleCache(){ -// const options = { -// max: 500, -// maxSize: 5000, -// sizeCalculation: (value, key) => { -// return 1 -// }, -// } -// const cache = new CacheStore(options) -// cache.set('GET:/', {data: 'dataFromCache', vary: {'origin': 'http://0.0.0.0:54758', 'Accept-Encoding': 'Application/json'}}, {}) -// cache.set('GET:/foobar', {data: 'dataFromCache'}, {}) -// cache.set('POST:/foo', {data: 'dataFromCache', vary: {'host': '0.0.0.0:54758'}}, {}) -// cache.set('GET:/', {data: { -// headers: [ -// 'Vary': {'origin': 'http://0.0.0.0:54758', 'Accept-Encoding': 'Application/json'} -// ], -// }}) - -// return cache -// } - -// test('cache request, found a matching entry in cache', (t) => { -// t.plan(1) -// const server = createServer((req, res) => { -// res.writeHead(200, { Vary: 'Host, Origin, user-agent' }) -// res.end('asd') -// }) - -// t.teardown(server.close.bind(server)) - -// // const cache = exampleCache() -// server.listen(0, async () => { -// const response = await undici.request(`http://0.0.0.0:${server.address().port}`, { -// dispatcher: new undici.Agent().compose( -// interceptors.responseError(), -// interceptors.requestBodyFactory(), -// interceptors.log(), -// interceptors.dns(), -// interceptors.lookup(), -// interceptors.requestId(), -// interceptors.responseRetry(), -// interceptors.responseVerify(), -// interceptors.redirect(), -// interceptors.cache(), -// interceptors.proxy() -// ), -// cache: true, -// Accept: 'application/txt', -// 'User-Agent': 'Chrome', -// origin2: 'www.google.com/images' -// }) -// let str = '' -// for await (const chunk of response.body) { -// str += chunk -// } - -// console.log('response: ') -// console.log(response) -// t.equal(str, 'asd2') -// }) -// }) - -test('cache request, no matching entry found. Store response in cache', (t) => { - t.plan(1) +// Placeholder until we implement a better LRU Cache +class CacheStore { + constructor() { + this.cache = new Map() + } + + set(key, value) { + this.cache.set(key, value) + } + + get(key) { + return this.cache.get(key) + } +} + +async function exampleCache() { + const cache = new CacheStore() + + const rawHeaders = [ + Buffer.from('Content-Type'), + Buffer.from('application/json'), + Buffer.from('Content-Length'), + Buffer.from('10'), + Buffer.from('Cache-Control'), + Buffer.from('public'), + ] + + const entries = [ + { + data: { + statusCode: 200, + statusMessage: '', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd1'], + }, + vary: [ + ['Accept', 'application/xml'], + ['User-Agent', 'Mozilla/5.0'], + ], + size: 100, + ttl: 31556952000, + }, + { + data: { + statusCode: 200, + statusMessage: '', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd2'], + }, + vary: [ + ['Accept', 'application/txt'], + ['User-Agent', 'Chrome'], + ['origin2', 'www.google.com/images'], + ], + size: 100, + ttl: 31556952000, + }, + // { + // statusCode: 200, statusMessage: 'last', rawHeaders, rawTrailers: ['Hello'], body: ['asd3'], + // vary: null }, + { + data: { + statusCode: 200, + statusMessage: 'first', + rawHeaders, + rawTrailers: ['Hello'], + body: ['asd4'], + }, + vary: [ + ['Accept', 'application/json'], + ['User-Agent', 'Mozilla/5.0'], + ['host2', 'www.google.com'], + ['origin2', 'www.google.com/images'], + ], + size: 100, + ttl: 31556952000, + }, + ] + cache.set('GET:/', entries) + return cache +} + +test('cache request, no matching entry found. Store response in cache', async (t) => { + t.plan(4) const server = createServer((req, res) => { res.writeHead(307, { - Vary: 'Host', + Vary: 'Origin2, User-Agent, Accept', 'Cache-Control': 'public, immutable', - 'Content-Length': 1000, + 'Content-Length': 4, 'Content-Type': 'text/html', - Connection: 'keep-alive', - Location: 'http://www.blankwebsite.com/', + Connection: 'close', + Location: 'http://www.google.com/', }) - res.end('asd') + res.end('foob') }) t.teardown(server.close.bind(server)) + const cache = await exampleCache() + + console.log('Cache before first request:') + console.log({ cache: cache.cache }) + + const cacheLength1 = cache.get('GET:/').length + + console.log({ cacheLength1 }) + server.listen(0, async () => { - const response = await undici.request(`http://0.0.0.0:${server.address().port}`, { + const serverPort = server.address().port + // response not found in cache, response should be added to cache. + const response = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache: true, + cache, }) let str = '' for await (const chunk of response.body) { str += chunk } + const cacheLength2 = cache.get('GET:/').length + console.log({ cacheLength2 }) + console.log({ str }) + t.equal(str, 'foob') + t.equal(cacheLength2, cacheLength1 + 1) - console.log('response: ') - console.log(response) - t.equal(str, 'asd') - }) + console.log('Cache before second request:') + console.log({ cache: cache.cache }) - // Here we need to make another request to check if we get back the previous response but from the cache instead. + // response found in cache, return cached response. + const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + cache, + Accept: 'application/txt', + 'User-Agent': 'Chrome', + origin2: 'www.google.com/images', + }) + let str2 = '' + for await (const chunk of response2.body) { + str2 += chunk + } + + const cacheLength3 = cache.get('GET:/').length + console.log({ cacheLength3 }) + + t.equal(str2, 'asd2') + t.equal(cacheLength3, cacheLength2) + }) }) From 3d40ba2685f164c8b1dad60f11723ceb344cfd48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Thu, 11 Jul 2024 09:17:42 +0200 Subject: [PATCH 04/15] fix(cache): cleanup for readability --- lib/interceptor/cache.js | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 7b5e63f..9e20ee6 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -88,22 +88,19 @@ class CacheHandler extends DecoratorHandler { onComplete(rawTrailers) { if (this.#value) { + // get + const entries = this.#handler.entries + const resHeaders = parseHeaders(this.#value.data.rawHeaders) + const reqHeaders = this.#handler.opts + + // set this.#value.data.rawTrailers = rawTrailers this.#value.size = this.#value.size ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) : 0 - - const entries = this.#handler.entries - - const reqHeaders = this.#handler.opts - const resHeaders = parseHeaders(this.#value.data.rawHeaders) - this.#value.vary = formatVaryData(resHeaders, reqHeaders) - entries.push(this.#value) - sortEntriesByVary(entries) - this.#store.set(this.#key, entries) } return this.#handler.onComplete(rawTrailers) From 930bdb2f8729c50f61501ef09fe63bbc0a98f886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Tue, 13 Aug 2024 15:57:21 +0200 Subject: [PATCH 05/15] feat(cache): added the built-in nodejs/sqlite as the in-memory cache. --- lib/interceptor/cache.js | 105 ++++++++++++++++++++------- package.json | 1 - test/cache.js | 153 +++++++++++++++++++++++++++++++-------- 3 files changed, 201 insertions(+), 58 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 9e20ee6..7549a5a 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -1,19 +1,21 @@ import assert from 'node:assert' -// import { LRUCache } from 'lru-cache' import { DecoratorHandler, parseHeaders, parseCacheControl } from '../utils.js' +import { DatabaseSync } from 'node:sqlite' // --experimental-sqlite class CacheHandler extends DecoratorHandler { #handler #store #key + #opts #value = null - constructor({ key, handler, store }) { + constructor({ key, handler, store, opts = [] }) { super(handler) this.#key = key this.#handler = handler this.#store = store + this.#opts = opts } onConnect(abort) { @@ -64,11 +66,10 @@ class CacheHandler extends DecoratorHandler { (rawHeaders?.reduce((xs, x) => xs + x.length, 0) ?? 0) + (statusMessage?.length ?? 0) + 64, - ttl: ttl * 1e3, + ttl, // in ms! } } } - return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -88,20 +89,16 @@ class CacheHandler extends DecoratorHandler { onComplete(rawTrailers) { if (this.#value) { - // get - const entries = this.#handler.entries const resHeaders = parseHeaders(this.#value.data.rawHeaders) - const reqHeaders = this.#handler.opts + const reqHeaders = this.#opts - // set this.#value.data.rawTrailers = rawTrailers this.#value.size = this.#value.size ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) : 0 this.#value.vary = formatVaryData(resHeaders, reqHeaders) - entries.push(this.#value) - sortEntriesByVary(entries) - this.#store.set(this.#key, entries) + + this.#store.set(this.#key, this.#value) } return this.#handler.onComplete(rawTrailers) } @@ -115,20 +112,72 @@ function formatVaryData(resHeaders, reqHeaders) { .filter(([_key, val]) => val) } -// TODO (fix): Async filesystem cache. +// Can we move this class somewhere else, to the util.js file or its own module? class CacheStore { - constructor({ maxSize = 1024 * 1024, maxEntrySize = 128 * 1024 }) { - this.maxSize = maxSize - this.maxEntrySize = maxEntrySize - this.cache = new Map() + constructor() { + this.database = null + this.init() } - set(key, value, opts) { - this.cache.set(key, value) + init() { + this.database = new DatabaseSync('file:memdb1?mode=memory&cache=shared') + + this.database.exec(` + CREATE TABLE IF NOT EXISTS cacheInterceptor( + key TEXT, + data TEXT, + vary TEXT, + size INTEGER, + ttl INTEGER, + insertTime INTEGER + ) STRICT + `) + } + + set(key, entry, opts) { + if (!this.database) { + throw new Error('Database not initialized') + } + + entry.data = JSON.stringify(entry.data) + entry.vary = JSON.stringify(entry.vary) + + const insert = this.database.prepare( + 'INSERT INTO cacheInterceptor (key, data, vary, size, ttl, insertTime) VALUES (?, ?, ?, ?, ?, ?)', + ) + + insert.run(key, entry.data, entry.vary, entry.size, entry.ttl, Date.now()) + + this.purge() } get(key) { - return this.cache.get(key) + if (!this.database) { + throw new Error('Database not initialized') + } + this.purge() + const query = this.database.prepare('SELECT * FROM cacheInterceptor WHERE key = ?') + const rows = query.all(key) + rows.map((i) => { + i.data = JSON.parse(i.data) + i.vary = JSON.parse(i.vary) + return i + }) + + // Just in case purge hasn't finished + const nonExpiredRows = rows.filter((i) => i.insertTime + i.ttl > Date.now()) + + return nonExpiredRows + } + + purge() { + const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE insertTime + ttl < ?') + query.run(Date.now()) + } + + deleteAll() { + const query = this.database.prepare('DELETE FROM cacheInterceptor') + query.run() } } @@ -151,12 +200,12 @@ function findEntryByHeaders(entries, reqHeaders) { return entries?.find( (entry) => entry.vary?.every(([key, val]) => { - return reqHeaders[key] === val + return reqHeaders?.headers[key] === val }) ?? true, ) } -const DEFAULT_CACHE_STORE = new CacheStore({ maxSize: 128 * 1024, maxEntrySize: 1024 }) +const DEFAULT_CACHE_STORE = new CacheStore() export default (opts) => (dispatch) => (opts, handler) => { if (!opts.cache || opts.upgrade) { @@ -189,6 +238,11 @@ export default (opts) => (dispatch) => (opts, handler) => { opts.host = opts.host ?? new URL(opts.origin).host + if (!opts.headers) { + opts.headers = {} + } + + // idea: use DEFAULT_CACHE_STORE by default if 'cache' not specified, since the cache interceptor was already specified to be used. const store = opts.cache === true ? DEFAULT_CACHE_STORE : opts.cache if (!store) { @@ -196,7 +250,7 @@ export default (opts) => (dispatch) => (opts, handler) => { } let key = `${opts.method}:${opts.path}` - console.log('getting key: ' + key) + let entries = store.get(key) if (Array.isArray(entries) && entries.length === 0 && opts.method === 'HEAD') { @@ -219,11 +273,14 @@ export default (opts) => (dispatch) => (opts, handler) => { try { handler.onConnect(abort) signal.throwIfAborted() + handler.onHeaders(statusCode, rawHeaders, resume, statusMessage) signal.throwIfAborted() + if (opts.method !== 'HEAD') { for (const chunk of body) { const ret = handler.onData(chunk) + signal.throwIfAborted() if (ret === false) { // TODO (fix): back pressure... @@ -239,8 +296,6 @@ export default (opts) => (dispatch) => (opts, handler) => { return true } else { - handler.opts = opts - handler.entries = entries - return dispatch(opts, new CacheHandler({ handler, store, key })) + return dispatch(opts, new CacheHandler({ handler, store, key, opts })) } } diff --git a/package.json b/package.json index 26ac3e0..4f52c7e 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,6 @@ "cache-control-parser": "^2.0.6", "cacheable-lookup": "^7.0.0", "http-errors": "^2.0.0", - "lru-cache": "^11.0.0", "undici": "^6.19.5" }, "devDependencies": { diff --git a/test/cache.js b/test/cache.js index a04d691..0c731d7 100644 --- a/test/cache.js +++ b/test/cache.js @@ -2,26 +2,82 @@ import { test } from 'tap' import { createServer } from 'node:http' import undici from 'undici' import { interceptors } from '../lib/index.js' +import { DatabaseSync } from 'node:sqlite' -// Placeholder until we implement a better LRU Cache class CacheStore { constructor() { - this.cache = new Map() + this.database = null + this.init() } - set(key, value) { - this.cache.set(key, value) + init() { + this.database = new DatabaseSync('file:memdb1?mode=memory&cache=shared') + + this.database.exec(` + CREATE TABLE IF NOT EXISTS cacheInterceptor( + key TEXT, + data TEXT, + vary TEXT, + size INTEGER, + ttl INTEGER, + insertTime INTEGER + ) STRICT + `) + } + + set(key, entry) { + if (!this.database) { + throw new Error('Database not initialized') + } + + // Format the entry object + entry.data = JSON.stringify(entry.data) + entry.vary = JSON.stringify(entry.vary) + + const insert = this.database.prepare( + 'INSERT INTO cacheInterceptor (key, data, vary, size, ttl, insertTime) VALUES (?, ?, ?, ?, ?, ?)', + ) + + insert.run(key, entry.data, entry.vary, entry.size, entry.ttl, Date.now()) + + this.purge() } get(key) { - return this.cache.get(key) + if (!this.database) { + throw new Error('Database not initialized') + } + this.purge() + const query = this.database.prepare('SELECT * FROM cacheInterceptor WHERE key = ?') + const rows = query.all(key) + rows.map((i) => { + i.data = JSON.parse(i.data) + i.vary = JSON.parse(i.vary) + return i + }) + + // Just in case purge hasn't finished + const nonExpiredRows = rows.filter((i) => i.insertTime + i.ttl > Date.now()) + + return nonExpiredRows } -} -async function exampleCache() { - const cache = new CacheStore() + purge() { + if (!this.database) { + throw new Error('Database not initialized') + } + const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE insertTime + ttl < ?') + query.run(Date.now()) + } + + deleteAll() { + const query = this.database.prepare('DELETE FROM cacheInterceptor') + query.run() + } +} - const rawHeaders = [ +function exampleEntries() { + const rawHeaders1 = [ Buffer.from('Content-Type'), Buffer.from('application/json'), Buffer.from('Content-Length'), @@ -29,14 +85,26 @@ async function exampleCache() { Buffer.from('Cache-Control'), Buffer.from('public'), ] + const rawHeaders2 = [ + Buffer.from('Accept'), + Buffer.from('application/txt'), + Buffer.from('Content-Length'), + Buffer.from('4'), + Buffer.from('origin2'), + Buffer.from('www.google.com/images'), + Buffer.from('User-Agent'), + Buffer.from('Chrome'), + Buffer.from('Cache-Control'), + Buffer.from('public'), + ] const entries = [ { data: { statusCode: 200, statusMessage: '', - rawHeaders, - rawTrailers: ['Hello'], + rawHeaders: rawHeaders1, + rawTrailers: ['Hello', 'world'], body: ['asd1'], }, vary: [ @@ -50,8 +118,8 @@ async function exampleCache() { data: { statusCode: 200, statusMessage: '', - rawHeaders, - rawTrailers: ['Hello'], + rawHeaders: rawHeaders2, + rawTrailers: ['Hello', 'world'], body: ['asd2'], }, vary: [ @@ -69,7 +137,7 @@ async function exampleCache() { data: { statusCode: 200, statusMessage: 'first', - rawHeaders, + rawHeaders1, rawTrailers: ['Hello'], body: ['asd4'], }, @@ -82,11 +150,28 @@ async function exampleCache() { size: 100, ttl: 31556952000, }, + { + data: { + statusCode: 200, + statusMessage: 'to be purged', + rawHeaders1, + rawTrailers: ['Hello'], + body: ['asd4'], + }, + vary: [ + ['Accept', 'application/json'], + ['User-Agent', 'Mozilla/5.0'], + ['host2', 'www.google.com'], + ['origin2', 'www.google.com/images'], + ], + size: 100, + ttl: 1, + }, ] - cache.set('GET:/', entries) - return cache + return entries } +// This test will not always pass because of different execution times of operations in the in-memory database each time. test('cache request, no matching entry found. Store response in cache', async (t) => { t.plan(4) const server = createServer((req, res) => { @@ -103,42 +188,41 @@ test('cache request, no matching entry found. Store response in cache', async (t t.teardown(server.close.bind(server)) - const cache = await exampleCache() + const cache = new CacheStore() - console.log('Cache before first request:') - console.log({ cache: cache.cache }) + // populate cache + cache.deleteAll() + exampleEntries().forEach((i) => cache.set('GET:/', i)) const cacheLength1 = cache.get('GET:/').length - console.log({ cacheLength1 }) - server.listen(0, async () => { const serverPort = server.address().port // response not found in cache, response should be added to cache. const response = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache, + cache: true, }) let str = '' for await (const chunk of response.body) { str += chunk } const cacheLength2 = cache.get('GET:/').length - console.log({ cacheLength2 }) - console.log({ str }) + + // should return the default server response t.equal(str, 'foob') - t.equal(cacheLength2, cacheLength1 + 1) - console.log('Cache before second request:') - console.log({ cache: cache.cache }) + t.equal(cacheLength2, cacheLength1 + 1) // response found in cache, return cached response. const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache, - Accept: 'application/txt', - 'User-Agent': 'Chrome', - origin2: 'www.google.com/images', + headers: { + Accept: 'application/txt', + 'User-Agent': 'Chrome', + origin2: 'www.google.com/images', + }, + cache: true, }) let str2 = '' for await (const chunk of response2.body) { @@ -146,9 +230,14 @@ test('cache request, no matching entry found. Store response in cache', async (t } const cacheLength3 = cache.get('GET:/').length - console.log({ cacheLength3 }) + // should return the body from the cached entry t.equal(str2, 'asd2') + + // cache should still have the same number of entries before + // and after a cached entry was used as a response t.equal(cacheLength3, cacheLength2) + + cache.database.close() }) }) From d1da90f8453db197cd3de6dd32b68ace2563584b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Wed, 14 Aug 2024 14:46:16 +0200 Subject: [PATCH 06/15] feat(cache): give each entry an 'expiry' field instead of 'ttl' and 'insertTime'. Also exported the CacheStore. --- .gitignore | 1 + lib/interceptor/cache.js | 27 ++++++------- test/cache.js | 85 ++++------------------------------------ 3 files changed, 20 insertions(+), 93 deletions(-) diff --git a/.gitignore b/.gitignore index 5ed33aa..095e85c 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ db .nyc* yarn.lock .tap +file:* diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 7549a5a..45433b4 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -66,7 +66,7 @@ class CacheHandler extends DecoratorHandler { (rawHeaders?.reduce((xs, x) => xs + x.length, 0) ?? 0) + (statusMessage?.length ?? 0) + 64, - ttl, // in ms! + expires: Date.now() + ttl, // in ms! } } } @@ -112,8 +112,7 @@ function formatVaryData(resHeaders, reqHeaders) { .filter(([_key, val]) => val) } -// Can we move this class somewhere else, to the util.js file or its own module? -class CacheStore { +export class CacheStore { constructor() { this.database = null this.init() @@ -128,13 +127,12 @@ class CacheStore { data TEXT, vary TEXT, size INTEGER, - ttl INTEGER, - insertTime INTEGER + expires INTEGER ) STRICT `) } - set(key, entry, opts) { + set(key, entry) { if (!this.database) { throw new Error('Database not initialized') } @@ -143,10 +141,10 @@ class CacheStore { entry.vary = JSON.stringify(entry.vary) const insert = this.database.prepare( - 'INSERT INTO cacheInterceptor (key, data, vary, size, ttl, insertTime) VALUES (?, ?, ?, ?, ?, ?)', + 'INSERT INTO cacheInterceptor (key, data, vary, size, expires) VALUES (?, ?, ?, ?, ?)', ) - insert.run(key, entry.data, entry.vary, entry.size, entry.ttl, Date.now()) + insert.run(key, entry.data, entry.vary, entry.size, entry.expires) this.purge() } @@ -156,22 +154,21 @@ class CacheStore { throw new Error('Database not initialized') } this.purge() - const query = this.database.prepare('SELECT * FROM cacheInterceptor WHERE key = ?') - const rows = query.all(key) + const query = this.database.prepare( + 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ? ', + ) + const rows = query.all(key, Date.now()) rows.map((i) => { i.data = JSON.parse(i.data) i.vary = JSON.parse(i.vary) return i }) - // Just in case purge hasn't finished - const nonExpiredRows = rows.filter((i) => i.insertTime + i.ttl > Date.now()) - - return nonExpiredRows + return rows } purge() { - const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE insertTime + ttl < ?') + const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') query.run(Date.now()) } diff --git a/test/cache.js b/test/cache.js index 0c731d7..ee0ed64 100644 --- a/test/cache.js +++ b/test/cache.js @@ -2,79 +2,7 @@ import { test } from 'tap' import { createServer } from 'node:http' import undici from 'undici' import { interceptors } from '../lib/index.js' -import { DatabaseSync } from 'node:sqlite' - -class CacheStore { - constructor() { - this.database = null - this.init() - } - - init() { - this.database = new DatabaseSync('file:memdb1?mode=memory&cache=shared') - - this.database.exec(` - CREATE TABLE IF NOT EXISTS cacheInterceptor( - key TEXT, - data TEXT, - vary TEXT, - size INTEGER, - ttl INTEGER, - insertTime INTEGER - ) STRICT - `) - } - - set(key, entry) { - if (!this.database) { - throw new Error('Database not initialized') - } - - // Format the entry object - entry.data = JSON.stringify(entry.data) - entry.vary = JSON.stringify(entry.vary) - - const insert = this.database.prepare( - 'INSERT INTO cacheInterceptor (key, data, vary, size, ttl, insertTime) VALUES (?, ?, ?, ?, ?, ?)', - ) - - insert.run(key, entry.data, entry.vary, entry.size, entry.ttl, Date.now()) - - this.purge() - } - - get(key) { - if (!this.database) { - throw new Error('Database not initialized') - } - this.purge() - const query = this.database.prepare('SELECT * FROM cacheInterceptor WHERE key = ?') - const rows = query.all(key) - rows.map((i) => { - i.data = JSON.parse(i.data) - i.vary = JSON.parse(i.vary) - return i - }) - - // Just in case purge hasn't finished - const nonExpiredRows = rows.filter((i) => i.insertTime + i.ttl > Date.now()) - - return nonExpiredRows - } - - purge() { - if (!this.database) { - throw new Error('Database not initialized') - } - const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE insertTime + ttl < ?') - query.run(Date.now()) - } - - deleteAll() { - const query = this.database.prepare('DELETE FROM cacheInterceptor') - query.run() - } -} +import { CacheStore } from '../lib/interceptor/cache.js' function exampleEntries() { const rawHeaders1 = [ @@ -112,7 +40,7 @@ function exampleEntries() { ['User-Agent', 'Mozilla/5.0'], ], size: 100, - ttl: 31556952000, + expires: Date.now() + 31556952000, }, { data: { @@ -128,7 +56,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - ttl: 31556952000, + expires: Date.now() + 31556952000, }, // { // statusCode: 200, statusMessage: 'last', rawHeaders, rawTrailers: ['Hello'], body: ['asd3'], @@ -148,7 +76,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - ttl: 31556952000, + expires: Date.now() + 31556952000, }, { data: { @@ -165,7 +93,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - ttl: 1, + expires: Date.now(), }, ] return entries @@ -235,7 +163,8 @@ test('cache request, no matching entry found. Store response in cache', async (t t.equal(str2, 'asd2') // cache should still have the same number of entries before - // and after a cached entry was used as a response + // and after a cached entry was used as a response. + // In other words: the CacheHandler should not have added another entry to the cache. t.equal(cacheLength3, cacheLength2) cache.database.close() From b6166078b6a18487e6c5f37d9196ff3ed6ef6e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Wed, 14 Aug 2024 15:01:10 +0200 Subject: [PATCH 07/15] feat(cache): cleanup --- lib/interceptor/cache.js | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 45433b4..c81988a 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -246,14 +246,9 @@ export default (opts) => (dispatch) => (opts, handler) => { throw new Error(`Cache store not provided.`) } - let key = `${opts.method}:${opts.path}` + const key = `${opts.method}:${opts.path}` - let entries = store.get(key) - - if (Array.isArray(entries) && entries.length === 0 && opts.method === 'HEAD') { - key = `GET:${opts.path}` - entries = store.get(key) - } + const entries = (store.get(key) ?? opts.method === 'HEAD') ? store.get(`GET:${opts.path}`) : null const entry = findEntryByHeaders(entries, opts) From b57253e9e985be54496cd113e17383a6e1216346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Thu, 15 Aug 2024 14:51:29 +0200 Subject: [PATCH 08/15] feat(cache): don't cache vary = * --- lib/interceptor/cache.js | 7 +++++ test/cache.js | 65 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index c81988a..24bf411 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -90,12 +90,19 @@ class CacheHandler extends DecoratorHandler { onComplete(rawTrailers) { if (this.#value) { const resHeaders = parseHeaders(this.#value.data.rawHeaders) + + // Early return if vary = *, uncacheable. + if (resHeaders.vary === '*') { + return this.#handler.onComplete(rawTrailers) + } + const reqHeaders = this.#opts this.#value.data.rawTrailers = rawTrailers this.#value.size = this.#value.size ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) : 0 + this.#value.vary = formatVaryData(resHeaders, reqHeaders) this.#store.set(this.#key, this.#value) diff --git a/test/cache.js b/test/cache.js index ee0ed64..825007d 100644 --- a/test/cache.js +++ b/test/cache.js @@ -99,6 +99,17 @@ function exampleEntries() { return entries } +function dbsetup(populate = true) { + const cache = new CacheStore() + cache.deleteAll() + + if (populate) { + exampleEntries().forEach((i) => cache.set('GET:/', i)) + } + + return cache +} + // This test will not always pass because of different execution times of operations in the in-memory database each time. test('cache request, no matching entry found. Store response in cache', async (t) => { t.plan(4) @@ -116,11 +127,7 @@ test('cache request, no matching entry found. Store response in cache', async (t t.teardown(server.close.bind(server)) - const cache = new CacheStore() - - // populate cache - cache.deleteAll() - exampleEntries().forEach((i) => cache.set('GET:/', i)) + const cache = dbsetup() const cacheLength1 = cache.get('GET:/').length @@ -170,3 +177,51 @@ test('cache request, no matching entry found. Store response in cache', async (t cache.database.close() }) }) + +test('cache request, Vary: * should not be cached', async (t) => { + t.plan(2) + const server = createServer((req, res) => { + res.writeHead(307, { + Vary: '*', + 'Cache-Control': 'public, immutable', + 'Content-Length': 4, + 'Content-Type': 'text/html', + Connection: 'close', + Location: 'http://www.google.com/', + }) + res.end('foob') + }) + + t.teardown(server.close.bind(server)) + + const cache = dbsetup(false) + + const cacheLength1 = cache.get('GET:/').length + + server.listen(0, async () => { + const serverPort = server.address().port + // Response not found in cache, response should be added to cache. + // But the server returns Vary: *, and thus shouldn't be cached. + const response = await undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + cache: true, + headers: { + Accept: 'application/txt', + 'User-Agent': 'Chrome', + origin2: 'response should not be cached', + }, + }) + let str = '' + for await (const chunk of response.body) { + str += chunk + } + const cacheLength2 = cache.get('GET:/').length + + // should return the default server response + t.equal(str, 'foob') + + t.equal(cacheLength2, cacheLength1) + + cache.database.close() + }) +}) From 64e044f5fd568478f9599f503383e7cc955c3ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Fri, 16 Aug 2024 14:45:35 +0200 Subject: [PATCH 09/15] feat(cache): handle Range header and treat it as a vary. Also fixed bug where data structure weren't correctly parsed back from JSON --- lib/interceptor/cache.js | 25 +++++++++++- test/cache.js | 84 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 99 insertions(+), 10 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 24bf411..dfca094 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -91,13 +91,18 @@ class CacheHandler extends DecoratorHandler { if (this.#value) { const resHeaders = parseHeaders(this.#value.data.rawHeaders) - // Early return if vary = *, uncacheable. + // Early return if Vary = *, uncacheable. if (resHeaders.vary === '*') { return this.#handler.onComplete(rawTrailers) } const reqHeaders = this.#opts + // If Range header present, assume that the response varies based on Range. + if (reqHeaders.headers?.range) { + resHeaders.vary += ', Range' + } + this.#value.data.rawTrailers = rawTrailers this.#value.size = this.#value.size ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) @@ -115,7 +120,7 @@ function formatVaryData(resHeaders, reqHeaders) { return resHeaders.vary ?.split(',') .map((key) => key.trim().toLowerCase()) - .map((key) => [key, reqHeaders[key]]) + .map((key) => [key, reqHeaders[key] ?? reqHeaders.headers[key]]) .filter(([_key, val]) => val) } @@ -168,6 +173,13 @@ export class CacheStore { rows.map((i) => { i.data = JSON.parse(i.data) i.vary = JSON.parse(i.vary) + i.data = { + ...i.data, + // JSON.parse doesn't convert a Buffer object back to a Buffer object once it has been stringified. + body: this.#convertToBuffer(i.data.body), + rawHeaders: this.#convertToBuffer(i.data.rawHeaders), + rawTrailers: this.#convertToBuffer(i.data.rawTrailers), + } return i }) @@ -183,6 +195,15 @@ export class CacheStore { const query = this.database.prepare('DELETE FROM cacheInterceptor') query.run() } + + #convertToBuffer(bufferArray) { + if (Array.isArray(bufferArray) && bufferArray.length > 0) { + return bufferArray.map((ba) => { + return typeof ba === 'object' ? Buffer.from(ba.data) : ba + }) + } + return [] + } } /* diff --git a/test/cache.js b/test/cache.js index 825007d..9eb851c 100644 --- a/test/cache.js +++ b/test/cache.js @@ -58,14 +58,11 @@ function exampleEntries() { size: 100, expires: Date.now() + 31556952000, }, - // { - // statusCode: 200, statusMessage: 'last', rawHeaders, rawTrailers: ['Hello'], body: ['asd3'], - // vary: null }, { data: { statusCode: 200, statusMessage: 'first', - rawHeaders1, + rawHeaders: rawHeaders1, rawTrailers: ['Hello'], body: ['asd4'], }, @@ -82,7 +79,7 @@ function exampleEntries() { data: { statusCode: 200, statusMessage: 'to be purged', - rawHeaders1, + rawHeaders: rawHeaders1, rawTrailers: ['Hello'], body: ['asd4'], }, @@ -111,7 +108,7 @@ function dbsetup(populate = true) { } // This test will not always pass because of different execution times of operations in the in-memory database each time. -test('cache request, no matching entry found. Store response in cache', async (t) => { +test('If no matching entry found, store the response in cache. Else return a matching entry.', async (t) => { t.plan(4) const server = createServer((req, res) => { res.writeHead(307, { @@ -171,14 +168,13 @@ test('cache request, no matching entry found. Store response in cache', async (t // cache should still have the same number of entries before // and after a cached entry was used as a response. - // In other words: the CacheHandler should not have added another entry to the cache. t.equal(cacheLength3, cacheLength2) cache.database.close() }) }) -test('cache request, Vary: * should not be cached', async (t) => { +test('Responses with header Vary: * should not be cached', async (t) => { t.plan(2) const server = createServer((req, res) => { res.writeHead(307, { @@ -225,3 +221,75 @@ test('cache request, Vary: * should not be cached', async (t) => { cache.database.close() }) }) + +test('Store 307-status-responses that happen to be dependent on the Range header', async (t) => { + t.plan(5) + const server = createServer((req, res) => { + res.writeHead(307, { + Vary: 'Origin2, User-Agent, Accept', + 'Cache-Control': 'public, immutable', + 'Content-Length': 4, + 'Content-Type': 'text/html', + Connection: 'close', + Location: 'http://www.google.com/', + }) + res.end('foob') + }) + + t.teardown(server.close.bind(server)) + + const cache = dbsetup(false) + + const cacheLength1 = cache.get('GET:/').length + + server.listen(0, async () => { + const serverPort = server.address().port + + const request1 = undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + cache: true, + headers: { + range: 'bytes=0-999', + }, + testing: 'testing 1', + }) + + // response not found in cache, response should be added to cache. + const response1 = await request1 + let str1 = '' + for await (const chunk of response1.body) { + str1 += chunk + } + const cacheLength2 = cache.get('GET:/').length + + // should return the default server response + t.equal(str1, 'foob') + t.equal(cacheLength1, 0) + t.equal(cacheLength2, 1) + + const request2 = undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + cache: true, + headers: { + range: 'bytes=0-999', + }, + testing: 'testing 2', + }) + + // response found in cache, response should be fetched from cache. + const response2 = await request2 + + let str2 = '' + for await (const chunk of response2.body) { + str2 += chunk + } + + const cacheLength3 = cache.get('GET:/').length + + // should return the cached response + t.equal(str2, 'foob') + t.equal(cacheLength3, 1) + + cache.database.close() + }) +}) From fb3ff472bacf2e967141eb9b7597d8c89a8ddc1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Fri, 16 Aug 2024 16:26:17 +0200 Subject: [PATCH 10/15] feat(cache): reuse prepared sqlite statements. Also make :memory: cache the default. --- lib/interceptor/cache.js | 41 ++++++++++++++++++++++++---------------- test/cache.js | 10 +++++----- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index dfca094..d493f00 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -125,13 +125,18 @@ function formatVaryData(resHeaders, reqHeaders) { } export class CacheStore { + #insertquery + #getQuery + #purgeQuery + #deleteAllQuery + constructor() { this.database = null this.init() } init() { - this.database = new DatabaseSync('file:memdb1?mode=memory&cache=shared') + this.database = new DatabaseSync(':memory:') this.database.exec(` CREATE TABLE IF NOT EXISTS cacheInterceptor( @@ -142,6 +147,18 @@ export class CacheStore { expires INTEGER ) STRICT `) + + this.#insertquery = this.database.prepare( + 'INSERT INTO cacheInterceptor (key, data, vary, size, expires) VALUES (?, ?, ?, ?, ?)', + ) + + this.#getQuery = this.database.prepare( + 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ? ', + ) + + this.#purgeQuery = this.database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') + + this.#deleteAllQuery = this.database.prepare('DELETE FROM cacheInterceptor') } set(key, entry) { @@ -152,11 +169,7 @@ export class CacheStore { entry.data = JSON.stringify(entry.data) entry.vary = JSON.stringify(entry.vary) - const insert = this.database.prepare( - 'INSERT INTO cacheInterceptor (key, data, vary, size, expires) VALUES (?, ?, ?, ?, ?)', - ) - - insert.run(key, entry.data, entry.vary, entry.size, entry.expires) + this.#insertquery.run(key, entry.data, entry.vary, entry.size, entry.expires) this.purge() } @@ -166,10 +179,8 @@ export class CacheStore { throw new Error('Database not initialized') } this.purge() - const query = this.database.prepare( - 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ? ', - ) - const rows = query.all(key, Date.now()) + + const rows = this.#getQuery.all(key, Date.now()) rows.map((i) => { i.data = JSON.parse(i.data) i.vary = JSON.parse(i.vary) @@ -187,22 +198,20 @@ export class CacheStore { } purge() { - const query = this.database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') - query.run(Date.now()) + this.#purgeQuery.run(Date.now()) } deleteAll() { - const query = this.database.prepare('DELETE FROM cacheInterceptor') - query.run() + this.#deleteAllQuery.run() } #convertToBuffer(bufferArray) { if (Array.isArray(bufferArray) && bufferArray.length > 0) { return bufferArray.map((ba) => { - return typeof ba === 'object' ? Buffer.from(ba.data) : ba + return ba?.type === 'Buffer' ? Buffer.from(ba.data) : ba }) } - return [] + return bufferArray } } diff --git a/test/cache.js b/test/cache.js index 9eb851c..6a9dca4 100644 --- a/test/cache.js +++ b/test/cache.js @@ -133,7 +133,7 @@ test('If no matching entry found, store the response in cache. Else return a mat // response not found in cache, response should be added to cache. const response = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache: true, + cache, }) let str = '' for await (const chunk of response.body) { @@ -154,7 +154,7 @@ test('If no matching entry found, store the response in cache. Else return a mat 'User-Agent': 'Chrome', origin2: 'www.google.com/images', }, - cache: true, + cache, }) let str2 = '' for await (const chunk of response2.body) { @@ -200,7 +200,7 @@ test('Responses with header Vary: * should not be cached', async (t) => { // But the server returns Vary: *, and thus shouldn't be cached. const response = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache: true, + cache, headers: { Accept: 'application/txt', 'User-Agent': 'Chrome', @@ -247,7 +247,7 @@ test('Store 307-status-responses that happen to be dependent on the Range header const request1 = undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache: true, + cache, headers: { range: 'bytes=0-999', }, @@ -269,7 +269,7 @@ test('Store 307-status-responses that happen to be dependent on the Range header const request2 = undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache: true, + cache, headers: { range: 'bytes=0-999', }, From ee288c15c4a937c4c2a9c94809305e29e80af97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Wed, 21 Aug 2024 15:37:09 +0200 Subject: [PATCH 11/15] feat(cache): added maxSize options to cache. Purge cache depending on size. Added BJSON --- lib/interceptor/cache.js | 110 ++++++++++++++++++++------------------- package.json | 1 + test/cache.js | 89 +++++++++++++------------------ 3 files changed, 95 insertions(+), 105 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index d493f00..4b66dd7 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -1,6 +1,7 @@ import assert from 'node:assert' import { DecoratorHandler, parseHeaders, parseCacheControl } from '../utils.js' import { DatabaseSync } from 'node:sqlite' // --experimental-sqlite +import * as BJSON from 'buffer-json' class CacheHandler extends DecoratorHandler { #handler @@ -25,7 +26,7 @@ class CacheHandler extends DecoratorHandler { } onHeaders(statusCode, rawHeaders, resume, statusMessage, headers = parseHeaders(rawHeaders)) { - if (statusCode !== 307) { + if (statusCode !== 307 && statusCode !== 200) { return this.#handler.onHeaders(statusCode, rawHeaders, resume, statusMessage, headers) } @@ -76,7 +77,6 @@ class CacheHandler extends DecoratorHandler { onData(chunk) { if (this.#value) { this.#value.size += chunk.bodyLength - const maxEntrySize = this.#store.maxEntrySize ?? Infinity if (this.#value.size > maxEntrySize) { this.#value = null @@ -98,11 +98,6 @@ class CacheHandler extends DecoratorHandler { const reqHeaders = this.#opts - // If Range header present, assume that the response varies based on Range. - if (reqHeaders.headers?.range) { - resHeaders.vary += ', Range' - } - this.#value.data.rawTrailers = rawTrailers this.#value.size = this.#value.size ? this.#value.size + rawTrailers?.reduce((xs, x) => xs + x.length, 0) @@ -128,18 +123,18 @@ export class CacheStore { #insertquery #getQuery #purgeQuery - #deleteAllQuery + #database + #size = 0 + #maxSize = 128e9 - constructor() { - this.database = null - this.init() - } + constructor(location = ':memory:', opts = {}) { + this.#maxSize = opts.maxSize ?? this.#maxSize - init() { - this.database = new DatabaseSync(':memory:') + this.#database = new DatabaseSync(location) - this.database.exec(` + this.#database.exec(` CREATE TABLE IF NOT EXISTS cacheInterceptor( + id INTEGER PRIMARY KEY, key TEXT, data TEXT, vary TEXT, @@ -148,70 +143,80 @@ export class CacheStore { ) STRICT `) - this.#insertquery = this.database.prepare( + this.#insertquery = this.#database.prepare( 'INSERT INTO cacheInterceptor (key, data, vary, size, expires) VALUES (?, ?, ?, ?, ?)', ) - this.#getQuery = this.database.prepare( + this.#getQuery = this.#database.prepare( 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ? ', ) - this.#purgeQuery = this.database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') - - this.#deleteAllQuery = this.database.prepare('DELETE FROM cacheInterceptor') + this.#purgeQuery = this.#database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') } - set(key, entry) { - if (!this.database) { + set(key, { data, vary, size, expires }) { + if (!this.#database) { throw new Error('Database not initialized') } - entry.data = JSON.stringify(entry.data) - entry.vary = JSON.stringify(entry.vary) + if (expires < Date.now()) { + return + } + + this.#insertquery.run(key, BJSON.stringify(data), BJSON.stringify(vary), size, expires) - this.#insertquery.run(key, entry.data, entry.vary, entry.size, entry.expires) + this.#size += size - this.purge() + this.#maybePurge() } get(key) { - if (!this.database) { + if (!this.#database) { throw new Error('Database not initialized') } - this.purge() - - const rows = this.#getQuery.all(key, Date.now()) - rows.map((i) => { - i.data = JSON.parse(i.data) - i.vary = JSON.parse(i.vary) - i.data = { - ...i.data, - // JSON.parse doesn't convert a Buffer object back to a Buffer object once it has been stringified. - body: this.#convertToBuffer(i.data.body), - rawHeaders: this.#convertToBuffer(i.data.rawHeaders), - rawTrailers: this.#convertToBuffer(i.data.rawTrailers), + + const rows = this.#getQuery.all(key, Date.now()).map(({ data, vary, size, expires }) => { + return { + data: BJSON.parse(data), + vary: JSON.parse(vary), + size: parseInt(size), + expires: parseInt(expires), } - return i }) return rows } - purge() { - this.#purgeQuery.run(Date.now()) + deleteAll() { + this.#database.exec('DELETE FROM cacheInterceptor') } - deleteAll() { - this.#deleteAllQuery.run() + close() { + this.#database.close() } - #convertToBuffer(bufferArray) { - if (Array.isArray(bufferArray) && bufferArray.length > 0) { - return bufferArray.map((ba) => { - return ba?.type === 'Buffer' ? Buffer.from(ba.data) : ba - }) + #maybePurge() { + if (this.#size == null || this.#size > this.#maxSize) { + this.#purgeQuery.run(Date.now()) + + this.#size = Object.values( + this.#database.prepare('SELECT SUM(size) FROM cacheInterceptor').get(), + )[0] + + // In the case where the cache is full but has no expired entries yet, delete 10% of the cache, ordered by + // the oldest entries according to the 'expires' column. + if (this.#size > this.#maxSize) { + this.#database.exec(` + DELETE FROM cacheInterceptor + WHERE id IN ( + SELECT id + FROM cacheInterceptor + ORDER BY expires ASC + LIMIT (SELECT ROUND(COUNT(*) * 0.10 + 1) FROM cacheInterceptor) + ); + `) + } } - return bufferArray } } @@ -228,13 +233,13 @@ function sortEntriesByVary(entries) { }) } -function findEntryByHeaders(entries, reqHeaders) { +function findEntryByHeaders(entries, request) { sortEntriesByVary(entries) return entries?.find( (entry) => entry.vary?.every(([key, val]) => { - return reqHeaders?.headers[key] === val + return request?.headers[key] === val || request[key] === val }) ?? true, ) } @@ -269,7 +274,6 @@ export default (opts) => (dispatch) => (opts, handler) => { // Dump body... opts.body?.on('error', () => {}).resume() - opts.host = opts.host ?? new URL(opts.origin).host if (!opts.headers) { diff --git a/package.json b/package.json index 4f52c7e..c3a8c44 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "lib/*" ], "dependencies": { + "buffer-json": "^2.0.0", "cache-control-parser": "^2.0.6", "cacheable-lookup": "^7.0.0", "http-errors": "^2.0.0", diff --git a/test/cache.js b/test/cache.js index 6a9dca4..61070c3 100644 --- a/test/cache.js +++ b/test/cache.js @@ -40,7 +40,7 @@ function exampleEntries() { ['User-Agent', 'Mozilla/5.0'], ], size: 100, - expires: Date.now() + 31556952000, + expires: Date.now() + 31556952001 + Math.floor(Math.random() * 100), }, { data: { @@ -56,7 +56,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - expires: Date.now() + 31556952000, + expires: Date.now() + 31556952002 + Math.floor(Math.random() * 100), }, { data: { @@ -73,7 +73,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - expires: Date.now() + 31556952000, + expires: Date.now() + 31556952003 + Math.floor(Math.random() * 100), }, { data: { @@ -98,7 +98,6 @@ function exampleEntries() { function dbsetup(populate = true) { const cache = new CacheStore() - cache.deleteAll() if (populate) { exampleEntries().forEach((i) => cache.set('GET:/', i)) @@ -107,8 +106,7 @@ function dbsetup(populate = true) { return cache } -// This test will not always pass because of different execution times of operations in the in-memory database each time. -test('If no matching entry found, store the response in cache. Else return a matching entry.', async (t) => { +test('If no matching entry found, store the response in cache. Else return a matching entry.', (t) => { t.plan(4) const server = createServer((req, res) => { res.writeHead(307, { @@ -170,11 +168,11 @@ test('If no matching entry found, store the response in cache. Else return a mat // and after a cached entry was used as a response. t.equal(cacheLength3, cacheLength2) - cache.database.close() + cache.close() }) }) -test('Responses with header Vary: * should not be cached', async (t) => { +test('Responses with header Vary: * should not be cached', (t) => { t.plan(2) const server = createServer((req, res) => { res.writeHead(307, { @@ -218,78 +216,65 @@ test('Responses with header Vary: * should not be cached', async (t) => { t.equal(cacheLength2, cacheLength1) - cache.database.close() + cache.close() }) }) -test('Store 307-status-responses that happen to be dependent on the Range header', async (t) => { - t.plan(5) +test('307-Redirect Vary on Host, save to cache, fetch from cache', (t) => { + t.plan(3) const server = createServer((req, res) => { res.writeHead(307, { - Vary: 'Origin2, User-Agent, Accept', + Vary: 'Host', 'Cache-Control': 'public, immutable', - 'Content-Length': 4, + 'Content-Length': 3, 'Content-Type': 'text/html', - Connection: 'close', - Location: 'http://www.google.com/', + Connection: 'keep-alive', + Location: 'http://www.blankwebsite.com/', + datenow: Date.now(), }) - res.end('foob') + res.end('asd') }) t.teardown(server.close.bind(server)) - const cache = dbsetup(false) - - const cacheLength1 = cache.get('GET:/').length - server.listen(0, async () => { - const serverPort = server.address().port - - const request1 = undici.request(`http://0.0.0.0:${serverPort}`, { + const response1 = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache, - headers: { - range: 'bytes=0-999', - }, - testing: 'testing 1', + cache: true, }) - - // response not found in cache, response should be added to cache. - const response1 = await request1 let str1 = '' for await (const chunk of response1.body) { str1 += chunk } - const cacheLength2 = cache.get('GET:/').length - // should return the default server response - t.equal(str1, 'foob') - t.equal(cacheLength1, 0) - t.equal(cacheLength2, 1) + t.equal(str1, 'asd') - const request2 = undici.request(`http://0.0.0.0:${serverPort}`, { + const response2 = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache, - headers: { - range: 'bytes=0-999', - }, - testing: 'testing 2', + cache: true, }) - - // response found in cache, response should be fetched from cache. - const response2 = await request2 - let str2 = '' for await (const chunk of response2.body) { str2 += chunk } - const cacheLength3 = cache.get('GET:/').length + t.equal(response1.headers.datenow, response2.headers.datenow) + t.equal(str2, 'asd') + }) +}) - // should return the cached response - t.equal(str2, 'foob') - t.equal(cacheLength3, 1) +test('Cache purging based on its maxSize', (t) => { + t.plan(1) + const cache = new CacheStore(':memory:', { maxSize: 500 }) - cache.database.close() - }) + exampleEntries() + .concat(exampleEntries()) + .concat(exampleEntries()) + .concat(exampleEntries()) + .forEach((i) => cache.set('GET:/', i)) + + const rows = cache.get('GET:/') + const totalSize = rows.reduce((acc, r) => r.size + acc, 0) + + t.equal(totalSize, 400) }) From 550decb21f0a4dfa53f5edbc12e57e73d40f151a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Thu, 22 Aug 2024 09:20:35 +0200 Subject: [PATCH 12/15] fix(cache): update size of cache after purging. --- lib/interceptor/cache.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 4b66dd7..0da53fb 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -124,6 +124,7 @@ export class CacheStore { #getQuery #purgeQuery #database + #sizeQuery #size = 0 #maxSize = 128e9 @@ -152,6 +153,8 @@ export class CacheStore { ) this.#purgeQuery = this.#database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') + + this.#sizeQuery = this.#database.prepare('SELECT SUM(size) FROM cacheInterceptor') } set(key, { data, vary, size, expires }) { @@ -199,9 +202,7 @@ export class CacheStore { if (this.#size == null || this.#size > this.#maxSize) { this.#purgeQuery.run(Date.now()) - this.#size = Object.values( - this.#database.prepare('SELECT SUM(size) FROM cacheInterceptor').get(), - )[0] + this.#size = Object.values(this.#sizeQuery.get())[0] // In the case where the cache is full but has no expired entries yet, delete 10% of the cache, ordered by // the oldest entries according to the 'expires' column. @@ -215,6 +216,8 @@ export class CacheStore { LIMIT (SELECT ROUND(COUNT(*) * 0.10 + 1) FROM cacheInterceptor) ); `) + + this.#size = Object.values(this.#sizeQuery.get())[0] } } } From 6a1a84dce00ddf7f015284c72e472f8741c6a4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Thu, 22 Aug 2024 14:17:22 +0200 Subject: [PATCH 13/15] feat(cache): added cache maxTTL option + refactoring --- lib/interceptor/cache.js | 29 +++----------- package.json | 1 + test/cache.js | 86 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 26 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 0da53fb..7d5a5a6 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -127,9 +127,11 @@ export class CacheStore { #sizeQuery #size = 0 #maxSize = 128e9 + #maxTTL = Infinity constructor(location = ':memory:', opts = {}) { this.#maxSize = opts.maxSize ?? this.#maxSize + this.#maxTTL = opts.maxTTL ?? this.#maxTTL this.#database = new DatabaseSync(location) @@ -158,14 +160,14 @@ export class CacheStore { } set(key, { data, vary, size, expires }) { - if (!this.#database) { - throw new Error('Database not initialized') - } - if (expires < Date.now()) { return } + const maxExpires = Date.now() + this.#maxTTL + + expires = expires > maxExpires ? maxExpires : expires + this.#insertquery.run(key, BJSON.stringify(data), BJSON.stringify(vary), size, expires) this.#size += size @@ -174,10 +176,6 @@ export class CacheStore { } get(key) { - if (!this.#database) { - throw new Error('Database not initialized') - } - const rows = this.#getQuery.all(key, Date.now()).map(({ data, vary, size, expires }) => { return { data: BJSON.parse(data), @@ -223,22 +221,7 @@ export class CacheStore { } } -/* - Sort entries by number of vary headers in descending order, because - we need to compare the most complex response to the request first. - A cached response with an empty ´vary´ field will otherwise win every time. -*/ -function sortEntriesByVary(entries) { - entries.sort((a, b) => { - const lengthA = a.vary ? a.vary.length : 0 - const lengthB = b.vary ? b.vary.length : 0 - return lengthB - lengthA - }) -} - function findEntryByHeaders(entries, request) { - sortEntriesByVary(entries) - return entries?.find( (entry) => entry.vary?.every(([key, val]) => { diff --git a/package.json b/package.json index c3a8c44..8d383c0 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "cache-control-parser": "^2.0.6", "cacheable-lookup": "^7.0.0", "http-errors": "^2.0.0", + "sqlite3": "^5.1.7", "undici": "^6.19.5" }, "devDependencies": { diff --git a/test/cache.js b/test/cache.js index 61070c3..86812ab 100644 --- a/test/cache.js +++ b/test/cache.js @@ -40,7 +40,7 @@ function exampleEntries() { ['User-Agent', 'Mozilla/5.0'], ], size: 100, - expires: Date.now() + 31556952001 + Math.floor(Math.random() * 100), + expires: Date.now() * 2 + Math.floor(Math.random() * 100), }, { data: { @@ -56,7 +56,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - expires: Date.now() + 31556952002 + Math.floor(Math.random() * 100), + expires: Date.now() * 2 + Math.floor(Math.random() * 100), }, { data: { @@ -73,7 +73,7 @@ function exampleEntries() { ['origin2', 'www.google.com/images'], ], size: 100, - expires: Date.now() + 31556952003 + Math.floor(Math.random() * 100), + expires: Date.now() * 2 + Math.floor(Math.random() * 100), }, { data: { @@ -278,3 +278,83 @@ test('Cache purging based on its maxSize', (t) => { t.equal(totalSize, 400) }) + +test('Cache #maxTTL overwriting entries ttl', (t) => { + t.plan(1) + + const day = 1000 * 60 * 60 * 24 + const cache = new CacheStore(':memory:', { maxTTL: day }) + exampleEntries().forEach((i) => cache.set('GET:/', i)) + + const row = cache.get('GET:/')[0] + const rowExpires = Math.floor(row.expires / 1000) + const maxExpires = Math.floor((Date.now() + day) / 1000) + + t.equal(rowExpires, maxExpires) +}) + +// test('200-OK, save to cache, fetch from cache', (t) => { +// t.plan(4) +// const server = createServer((req, res) => { +// res.writeHead(307, { +// Vary: 'Origin2, User-Agent, Accept', +// 'Cache-Control': 'public, immutable', +// 'Content-Length': 4, +// 'Content-Type': 'text/html', +// Connection: 'close', +// Location: 'http://www.google.com/', +// }) +// res.end('foob') +// }) + +// t.teardown(server.close.bind(server)) + +// const cache = dbsetup() + +// const cacheLength1 = cache.get('GET:/').length + +// server.listen(0, async () => { +// const serverPort = server.address().port +// // response not found in cache, response should be added to cache. +// const response = await undici.request(`http://0.0.0.0:${serverPort}`, { +// dispatcher: new undici.Agent().compose(interceptors.cache()), +// cache, +// }) +// let str = '' +// for await (const chunk of response.body) { +// str += chunk +// } +// const cacheLength2 = cache.get('GET:/').length + +// // should return the default server response +// t.equal(str, 'foob') + +// t.equal(cacheLength2, cacheLength1 + 1) + +// // response found in cache, return cached response. +// const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { +// dispatcher: new undici.Agent().compose(interceptors.cache()), +// headers: { +// Accept: 'application/txt', +// 'User-Agent': 'Chrome', +// origin2: 'www.google.com/images', +// }, +// cache, +// }) +// let str2 = '' +// for await (const chunk of response2.body) { +// str2 += chunk +// } + +// const cacheLength3 = cache.get('GET:/').length + +// // should return the body from the cached entry +// t.equal(str2, 'asd2') + +// // cache should still have the same number of entries before +// // and after a cached entry was used as a response. +// t.equal(cacheLength3, cacheLength2) + +// cache.close() +// }) +// }) From d5be580ffed74c71ce9935d4ba4617aeb906dc55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Tue, 27 Aug 2024 14:28:18 +0200 Subject: [PATCH 14/15] feat(cache): replace sqlite3 with better-sqlite3. --- lib/interceptor/cache.js | 91 +++++++++++--------- package.json | 2 +- test/cache.js | 178 +++++++++++++++++++-------------------- 3 files changed, 142 insertions(+), 129 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index 7d5a5a6..b24fa09 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -1,6 +1,7 @@ import assert from 'node:assert' import { DecoratorHandler, parseHeaders, parseCacheControl } from '../utils.js' -import { DatabaseSync } from 'node:sqlite' // --experimental-sqlite +import Database from 'better-sqlite3' + import * as BJSON from 'buffer-json' class CacheHandler extends DecoratorHandler { @@ -120,9 +121,11 @@ function formatVaryData(resHeaders, reqHeaders) { } export class CacheStore { + connected = false #insertquery #getQuery - #purgeQuery + #purgeExpiredQuery + #purgeOldestQuery #database #sizeQuery #size = 0 @@ -133,11 +136,11 @@ export class CacheStore { this.#maxSize = opts.maxSize ?? this.#maxSize this.#maxTTL = opts.maxTTL ?? this.#maxTTL - this.#database = new DatabaseSync(location) + this.#database = new Database(location) this.#database.exec(` CREATE TABLE IF NOT EXISTS cacheInterceptor( - id INTEGER PRIMARY KEY, + id INTEGER PRIMARY KEY AUTOINCREMENT, key TEXT, data TEXT, vary TEXT, @@ -149,14 +152,24 @@ export class CacheStore { this.#insertquery = this.#database.prepare( 'INSERT INTO cacheInterceptor (key, data, vary, size, expires) VALUES (?, ?, ?, ?, ?)', ) - this.#getQuery = this.#database.prepare( - 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ? ', + 'SELECT * FROM cacheInterceptor WHERE key = ? AND expires > ?', + ) + this.#purgeExpiredQuery = this.#database.prepare( + 'DELETE FROM cacheInterceptor WHERE expires < ?', ) - - this.#purgeQuery = this.#database.prepare('DELETE FROM cacheInterceptor WHERE expires < ?') - this.#sizeQuery = this.#database.prepare('SELECT SUM(size) FROM cacheInterceptor') + this.#purgeOldestQuery = this.#database.prepare(` + DELETE FROM cacheInterceptor + WHERE id IN ( + SELECT id + FROM cacheInterceptor + ORDER BY expires ASC + LIMIT (SELECT CEILING(COUNT(*) * 0.10) FROM cacheInterceptor) + ); + `) + + this.connected = true } set(key, { data, vary, size, expires }) { @@ -165,18 +178,19 @@ export class CacheStore { } const maxExpires = Date.now() + this.#maxTTL - expires = expires > maxExpires ? maxExpires : expires this.#insertquery.run(key, BJSON.stringify(data), BJSON.stringify(vary), size, expires) this.#size += size - this.#maybePurge() } get(key) { - const rows = this.#getQuery.all(key, Date.now()).map(({ data, vary, size, expires }) => { + const rows = this.#getQuery.all(key, Date.now()) + + return rows.map((row) => { + const { data, vary, size, expires } = row return { data: BJSON.parse(data), vary: JSON.parse(vary), @@ -184,53 +198,41 @@ export class CacheStore { expires: parseInt(expires), } }) - - return rows - } - - deleteAll() { - this.#database.exec('DELETE FROM cacheInterceptor') } close() { this.#database.close() + this.connected = false } #maybePurge() { if (this.#size == null || this.#size > this.#maxSize) { - this.#purgeQuery.run(Date.now()) - - this.#size = Object.values(this.#sizeQuery.get())[0] + this.#purgeExpiredQuery.run(Date.now()) + this.#size = this.#sizeQuery.get()['SUM(size)'] // In the case where the cache is full but has no expired entries yet, delete 10% of the cache, ordered by // the oldest entries according to the 'expires' column. if (this.#size > this.#maxSize) { - this.#database.exec(` - DELETE FROM cacheInterceptor - WHERE id IN ( - SELECT id - FROM cacheInterceptor - ORDER BY expires ASC - LIMIT (SELECT ROUND(COUNT(*) * 0.10 + 1) FROM cacheInterceptor) - ); - `) - - this.#size = Object.values(this.#sizeQuery.get())[0] + this.#purgeOldestQuery.run() + this.#size = this.#sizeQuery.get()['SUM(size)'] } } } } function findEntryByHeaders(entries, request) { - return entries?.find( + const foundEntry = entries?.find( (entry) => entry.vary?.every(([key, val]) => { return request?.headers[key] === val || request[key] === val }) ?? true, ) + + // if no exact match was found, take the latest added entry + return foundEntry ?? entries[0] } -const DEFAULT_CACHE_STORE = new CacheStore() +let cacheInstance = null export default (opts) => (dispatch) => (opts, handler) => { if (!opts.cache || opts.upgrade) { @@ -266,16 +268,27 @@ export default (opts) => (dispatch) => (opts, handler) => { opts.headers = {} } - // idea: use DEFAULT_CACHE_STORE by default if 'cache' not specified, since the cache interceptor was already specified to be used. - const store = opts.cache === true ? DEFAULT_CACHE_STORE : opts.cache + // Supported opts.cache values: [true, false, 'clear', custom cache] + // create new cache instance if none exists + if (opts.cache === 'clear' || (!cacheInstance?.connected && opts.cache === true)) { + cacheInstance = new CacheStore() + } + + // or use provided cache instead + if (typeof opts.cache === 'object') { + cacheInstance = opts.cache + } - if (!store) { + if (!cacheInstance) { throw new Error(`Cache store not provided.`) } const key = `${opts.method}:${opts.path}` - const entries = (store.get(key) ?? opts.method === 'HEAD') ? store.get(`GET:${opts.path}`) : null + const entries = + (cacheInstance.get(key) ?? opts.method === 'HEAD') + ? cacheInstance.get(`GET:${opts.path}`) + : null const entry = findEntryByHeaders(entries, opts) @@ -315,6 +328,6 @@ export default (opts) => (dispatch) => (opts, handler) => { return true } else { - return dispatch(opts, new CacheHandler({ handler, store, key, opts })) + return dispatch(opts, new CacheHandler({ handler, store: cacheInstance, key, opts })) } } diff --git a/package.json b/package.json index 8d383c0..2f60564 100644 --- a/package.json +++ b/package.json @@ -9,11 +9,11 @@ "lib/*" ], "dependencies": { + "better-sqlite3": "^11.2.1", "buffer-json": "^2.0.0", "cache-control-parser": "^2.0.6", "cacheable-lookup": "^7.0.0", "http-errors": "^2.0.0", - "sqlite3": "^5.1.7", "undici": "^6.19.5" }, "devDependencies": { diff --git a/test/cache.js b/test/cache.js index 86812ab..5723e3c 100644 --- a/test/cache.js +++ b/test/cache.js @@ -96,14 +96,16 @@ function exampleEntries() { return entries } -function dbsetup(populate = true) { - const cache = new CacheStore() +function dbsetup() { + const cache = new CacheStore(':memory:') + return cache +} - if (populate) { - exampleEntries().forEach((i) => cache.set('GET:/', i)) +function seedCache(cache) { + for (const entry of exampleEntries()) { + cache.set('GET:/', entry) } - - return cache + return exampleEntries().length } test('If no matching entry found, store the response in cache. Else return a matching entry.', (t) => { @@ -116,6 +118,7 @@ test('If no matching entry found, store the response in cache. Else return a mat 'Content-Type': 'text/html', Connection: 'close', Location: 'http://www.google.com/', + datenow: Date.now(), }) res.end('foob') }) @@ -124,25 +127,24 @@ test('If no matching entry found, store the response in cache. Else return a mat const cache = dbsetup() - const cacheLength1 = cache.get('GET:/').length - server.listen(0, async () => { const serverPort = server.address().port // response not found in cache, response should be added to cache. - const response = await undici.request(`http://0.0.0.0:${serverPort}`, { + const response1 = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), cache, }) let str = '' - for await (const chunk of response.body) { + for await (const chunk of response1.body) { str += chunk } - const cacheLength2 = cache.get('GET:/').length + const cacheLength1 = cache.get('GET:/').length + const added = seedCache(cache) - 1 // one is purged quickly due to ttl // should return the default server response t.equal(str, 'foob') - t.equal(cacheLength2, cacheLength1 + 1) + t.equal(cacheLength1, 1) // response found in cache, return cached response. const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { @@ -154,19 +156,15 @@ test('If no matching entry found, store the response in cache. Else return a mat }, cache, }) - let str2 = '' - for await (const chunk of response2.body) { - str2 += chunk - } - const cacheLength3 = cache.get('GET:/').length + const cacheLength2 = cache.get('GET:/').length - // should return the body from the cached entry - t.equal(str2, 'asd2') + // should return the same response + t.equal(response1.datenow, response2.datenow) // cache should still have the same number of entries before // and after a cached entry was used as a response. - t.equal(cacheLength3, cacheLength2) + t.equal(cacheLength2, cacheLength1 + added) cache.close() }) @@ -188,7 +186,7 @@ test('Responses with header Vary: * should not be cached', (t) => { t.teardown(server.close.bind(server)) - const cache = dbsetup(false) + const cache = dbsetup() const cacheLength1 = cache.get('GET:/').length @@ -198,7 +196,7 @@ test('Responses with header Vary: * should not be cached', (t) => { // But the server returns Vary: *, and thus shouldn't be cached. const response = await undici.request(`http://0.0.0.0:${serverPort}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), - cache, + cache: true, // use the CacheHandler's existing CacheStore created earlier with dbsetup(). headers: { Accept: 'application/txt', 'User-Agent': 'Chrome', @@ -237,6 +235,7 @@ test('307-Redirect Vary on Host, save to cache, fetch from cache', (t) => { t.teardown(server.close.bind(server)) + // entry not found, save to cache server.listen(0, async () => { const response1 = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), @@ -249,6 +248,7 @@ test('307-Redirect Vary on Host, save to cache, fetch from cache', (t) => { t.equal(str1, 'asd') + // entry found, fetch from cache const response2 = await undici.request(`http://0.0.0.0:${server.address().port}`, { dispatcher: new undici.Agent().compose(interceptors.cache()), cache: true, @@ -271,15 +271,18 @@ test('Cache purging based on its maxSize', (t) => { .concat(exampleEntries()) .concat(exampleEntries()) .concat(exampleEntries()) + .concat(exampleEntries()) + .concat(exampleEntries()) + .concat(exampleEntries()) // total size inserted: 2100 .forEach((i) => cache.set('GET:/', i)) const rows = cache.get('GET:/') const totalSize = rows.reduce((acc, r) => r.size + acc, 0) - t.equal(totalSize, 400) + t.equal(totalSize, 500) }) -test('Cache #maxTTL overwriting entries ttl', (t) => { +test('Cache #maxTTL overwriting ttl of individual entries', (t) => { t.plan(1) const day = 1000 * 60 * 60 * 24 @@ -293,68 +296,65 @@ test('Cache #maxTTL overwriting entries ttl', (t) => { t.equal(rowExpires, maxExpires) }) -// test('200-OK, save to cache, fetch from cache', (t) => { -// t.plan(4) -// const server = createServer((req, res) => { -// res.writeHead(307, { -// Vary: 'Origin2, User-Agent, Accept', -// 'Cache-Control': 'public, immutable', -// 'Content-Length': 4, -// 'Content-Type': 'text/html', -// Connection: 'close', -// Location: 'http://www.google.com/', -// }) -// res.end('foob') -// }) - -// t.teardown(server.close.bind(server)) - -// const cache = dbsetup() - -// const cacheLength1 = cache.get('GET:/').length - -// server.listen(0, async () => { -// const serverPort = server.address().port -// // response not found in cache, response should be added to cache. -// const response = await undici.request(`http://0.0.0.0:${serverPort}`, { -// dispatcher: new undici.Agent().compose(interceptors.cache()), -// cache, -// }) -// let str = '' -// for await (const chunk of response.body) { -// str += chunk -// } -// const cacheLength2 = cache.get('GET:/').length - -// // should return the default server response -// t.equal(str, 'foob') - -// t.equal(cacheLength2, cacheLength1 + 1) - -// // response found in cache, return cached response. -// const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { -// dispatcher: new undici.Agent().compose(interceptors.cache()), -// headers: { -// Accept: 'application/txt', -// 'User-Agent': 'Chrome', -// origin2: 'www.google.com/images', -// }, -// cache, -// }) -// let str2 = '' -// for await (const chunk of response2.body) { -// str2 += chunk -// } - -// const cacheLength3 = cache.get('GET:/').length - -// // should return the body from the cached entry -// t.equal(str2, 'asd2') - -// // cache should still have the same number of entries before -// // and after a cached entry was used as a response. -// t.equal(cacheLength3, cacheLength2) - -// cache.close() -// }) -// }) +test('200-OK, save to cache, fetch from cache', (t) => { + t.plan(4) + const server = createServer((req, res) => { + res.writeHead(200, { + Vary: 'User-Agent, Accept', + 'Cache-Control': 'public, immutable', + 'Content-Length': 4, + 'Content-Type': 'text/html', + Connection: 'close', + Location: 'http://www.google.com/', + datenow: Date.now(), + }) + res.end('foob') + }) + + t.teardown(server.close.bind(server)) + + const cache = dbsetup() + + server.listen(0, async () => { + const serverPort = server.address().port + // response not found in cache, response should be added to cache. + const response1 = await undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + cache, + }) + let str = '' + for await (const chunk of response1.body) { + str += chunk + } + + const cacheLength1 = cache.get('GET:/').length + + t.equal(cacheLength1, 1) + + // should return the default server response + t.equal(str, 'foob') + + const added = seedCache(cache) - 1 // (one is purged quickly due to ttl) + + // response found in cache, return cached response. + const response2 = await undici.request(`http://0.0.0.0:${serverPort}`, { + dispatcher: new undici.Agent().compose(interceptors.cache()), + headers: { + Accept: 'application/txt', + 'User-Agent': 'Chrome', + }, + cache, + }) + + const cacheLength2 = cache.get('GET:/').length + + // should return the response from the cached entry + t.equal(response2.datenow, response1.datenow) + + // cache should still have the same number of entries before + // and after a cached entry was used as a response. + t.equal(cacheLength2, added + cacheLength1) + + cache.close() + }) +}) From d8fbbcdda45fe9f6321757380c1b2512cc5b16a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20T=C3=B6rnros?= Date: Tue, 27 Aug 2024 14:31:40 +0200 Subject: [PATCH 15/15] fix(cache): minor refactor --- lib/interceptor/cache.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/interceptor/cache.js b/lib/interceptor/cache.js index b24fa09..11af2ba 100644 --- a/lib/interceptor/cache.js +++ b/lib/interceptor/cache.js @@ -176,9 +176,7 @@ export class CacheStore { if (expires < Date.now()) { return } - - const maxExpires = Date.now() + this.#maxTTL - expires = expires > maxExpires ? maxExpires : expires + expires = Math.min(expires, Date.now() + this.#maxTTL) this.#insertquery.run(key, BJSON.stringify(data), BJSON.stringify(vary), size, expires)