From 3c63b4e7d439371cec1b68fc6d0d496e4d082eae Mon Sep 17 00:00:00 2001 From: Stefan Penner Date: Fri, 12 Jul 2024 10:30:38 -0600 Subject: [PATCH] [Breaking] Ensure stability of filename cache-keys (#909) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [Bugfix] Ensure stability of filename cache-keys `JSON.stringify(structure)` isn’t inherently stable as it relies on various internal details of how `structure` was created. As written, if a given babel configuration is create in an dynamic manner, it is possible for babel-loader to have spurious cache misses. To address this, we can use one of the many stable stringify alternatives. For this PR I have selected [fast-stable-stringify](https://www.npmjs.com/package/fast-stable-stringify) for that task, as it appears both popular and it’s benchmarks look promising. This PR does not explicitly include tests, as testing this is both tricky to test in this context, and the important tests are contained within fast-stable-stringify itself. * update yarn.lock * perf: avoid serialize options twice The options will be serialized in the cache#filename function with the cache identifier, so we don't have to include options in the cache identifier. * polish: use our own config serializer * update cacheIdentifier docs --------- Co-authored-by: Huáng Jùnliàng --- README.md | 6 +--- src/cache.js | 7 ++-- src/index.js | 6 +--- src/serialize.js | 83 ++++++++++++++++++++++++++++++++++++++++++++++ test/cache.test.js | 4 ++- 5 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 src/serialize.js diff --git a/README.md b/README.md index 890ffd48..377a6f2f 100644 --- a/README.md +++ b/README.md @@ -88,11 +88,7 @@ This loader also supports the following loader-specific option: * `cacheDirectory`: Default `false`. When set, the given directory will be used to cache the results of the loader. Future webpack builds will attempt to read from the cache to avoid needing to run the potentially expensive Babel recompilation process on each run. If the value is set to `true` in options (`{cacheDirectory: true}`), the loader will use the default cache directory in `node_modules/.cache/babel-loader` or fallback to the default OS temporary file directory if no `node_modules` folder could be found in any root directory. -* `cacheIdentifier`: Default is a string composed by - - the `@babel/core`'s version and the `babel-loader`'s version - - the [merged](https://babeljs.io/docs/configuration#how-babel-merges-config-items) [Babel config](https://babeljs.io/docs/config-files), including options passed to `babel-loader` and the contents of `babel.config.js` or `.babelrc` file if they exist - - the value of the environment variable `BABEL_ENV` with a fallback to the `NODE_ENV` environment variable. - This can be set to a custom value to force cache busting if the identifier changes. +* `cacheIdentifier`: Default is a string composed by the `@babel/core`'s version and the `babel-loader`'s version. The final cache id will be determined by the input file path, the [merged](https://babeljs.io/docs/configuration#how-babel-merges-config-items) Babel config via `Babel.loadPartialConfigAsync` and the `cacheIdentifier`. The merged Babel config will be determined by the `babel.config.js` or `.babelrc` file if they exist, or the value of the environment variable `BABEL_ENV` and `NODE_ENV`. `cacheIdentifier` can be set to a custom value to force cache busting if the identifier changes. * `cacheCompression`: Default `true`. When set, each Babel transform output will be compressed with Gzip. If you want to opt-out of cache compression, set it to `false` -- your project may benefit from this if it transpiles thousands of files. diff --git a/src/cache.js b/src/cache.js index 2444bb12..15b10b77 100644 --- a/src/cache.js +++ b/src/cache.js @@ -13,10 +13,11 @@ const zlib = require("zlib"); const crypto = require("crypto"); const { promisify } = require("util"); const { readFile, writeFile, mkdir } = require("fs/promises"); +// Lazily instantiated when needed const findCacheDirP = import("find-cache-dir"); const transform = require("./transform"); -// Lazily instantiated when needed +const serialize = require("./serialize"); let defaultCacheDirectory = null; let hashType = "sha256"; @@ -70,9 +71,7 @@ const write = async function (filename, compress, result) { const filename = function (source, identifier, options) { const hash = crypto.createHash(hashType); - const contents = JSON.stringify({ source, options, identifier }); - - hash.update(contents); + hash.update(serialize([options, source, identifier])); return hash.digest("hex") + ".json"; }; diff --git a/src/index.js b/src/index.js index 5ba55e3e..a2b85015 100644 --- a/src/index.js +++ b/src/index.js @@ -174,11 +174,7 @@ async function loader(source, inputSourceMap, overrides) { const { cacheDirectory = null, - cacheIdentifier = JSON.stringify({ - options, - "@babel/core": transform.version, - "@babel/loader": version, - }), + cacheIdentifier = "core" + transform.version + "," + "loader" + version, cacheCompression = true, metadataSubscribers = [], } = loaderOptions; diff --git a/src/serialize.js b/src/serialize.js new file mode 100644 index 00000000..78e177a8 --- /dev/null +++ b/src/serialize.js @@ -0,0 +1,83 @@ +var objToString = Object.prototype.toString; +var objKeys = Object.getOwnPropertyNames; + +/** + * A custom Babel options serializer + * + * Intentional deviation from JSON.stringify: + * 1. Object properties are sorted before seralizing + * 2. The output is NOT a valid JSON: e.g. + * The output does not enquote strings, which means a JSON-like string '{"a":1}' + * will share the same result with an JS object { a: 1 }. This is not an issue + * for Babel options, but it can not be used for general serialization purpose + * 3. Only 20% slower than the native JSON.stringify on V8 + * + * This function is a fork from https://github.com/nickyout/fast-stable-stringify + * @param {*} val Babel options + * @param {*} isArrayProp + * @returns serialized Babel options + */ +function serialize(val, isArrayProp) { + var i, max, str, keys, key, propVal, toStr; + if (val === true) { + return "!0"; + } + if (val === false) { + return "!1"; + } + switch (typeof val) { + case "object": + if (val === null) { + return null; + } else if (val.toJSON && typeof val.toJSON === "function") { + return serialize(val.toJSON(), isArrayProp); + } else { + toStr = objToString.call(val); + if (toStr === "[object Array]") { + str = "["; + max = val.length - 1; + for (i = 0; i < max; i++) { + str += serialize(val[i], true) + ","; + } + if (max > -1) { + str += serialize(val[i], true); + } + return str + "]"; + } else if (toStr === "[object Object]") { + // only object is left + keys = objKeys(val).sort(); + max = keys.length; + str = "{"; + i = 0; + while (i < max) { + key = keys[i]; + propVal = serialize(val[key], false); + if (propVal !== undefined) { + if (str) { + str += ","; + } + str += '"' + key + '":' + propVal; + } + i++; + } + return str + "}"; + } else { + return JSON.stringify(val); + } + } + case "function": + case "undefined": + return isArrayProp ? null : undefined; + case "string": + return val; + default: + return isFinite(val) ? val : null; + } +} + +module.exports = function (val) { + var returnVal = serialize(val, false); + if (returnVal !== undefined) { + return "" + returnVal; + } +}; diff --git a/test/cache.test.js b/test/cache.test.js index 71290b36..0e81ba90 100644 --- a/test/cache.test.js +++ b/test/cache.test.js @@ -323,5 +323,7 @@ test("should allow to specify the .babelrc file", async t => { t.deepEqual(multiStats.stats[1].compilation.warnings, []); const files = fs.readdirSync(t.context.cacheDirectory); - t.true(files.length === 2); + // The two configs resolved to same Babel config because "fixtures/babelrc" + // is { "presets": ["@babel/preset-env"] } + t.true(files.length === 1); });