From b2f5de06a413f9522da40683dc9637bc59453839 Mon Sep 17 00:00:00 2001 From: naveenpaul1 Date: Wed, 9 Oct 2024 10:03:55 +0530 Subject: [PATCH] NSFS | Improve list objects performance on top of NS FS Signed-off-by: naveenpaul1 --- config.js | 3 + src/api/object_api.js | 6 + src/endpoint/s3/ops/s3_get_bucket.js | 1 + src/native/fs/fs_napi.cpp | 2 +- src/sdk/keymarker_fs.js | 50 + src/sdk/list_object_fs.js | 48 + src/sdk/namespace_fs.js | 1037 ++++------------- src/test/system_tests/test_utils.js | 22 + .../jest_tests/test_list_object.test.js | 175 --- .../test_unsort_list_object.test.js | 541 +++++++++ src/test/unit_tests/test_namespace_fs.js | 29 +- src/test/unit_tests/test_namespace_fs_mpu.js | 21 +- src/util/namespace_fs_util.js | 867 ++++++++++++++ 13 files changed, 1744 insertions(+), 1058 deletions(-) create mode 100644 src/sdk/keymarker_fs.js create mode 100644 src/sdk/list_object_fs.js delete mode 100644 src/test/unit_tests/jest_tests/test_list_object.test.js create mode 100644 src/test/unit_tests/jest_tests/test_unsort_list_object.test.js create mode 100644 src/util/namespace_fs_util.js diff --git a/config.js b/config.js index b7429fe55f..5a7ae3cbe1 100644 --- a/config.js +++ b/config.js @@ -828,6 +828,9 @@ config.NSFS_GLACIER_MIGRATE_INTERVAL = 15 * 60 * 1000; // of `manage_nsfs glacier restore` config.NSFS_GLACIER_RESTORE_INTERVAL = 15 * 60 * 1000; +// enable/disable unsorted listing application level +config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; + // NSFS_GLACIER_EXPIRY_RUN_TIME must be of the format hh:mm which specifies // when NooBaa should allow running glacier expiry process // NOTE: This will also be in the same timezone as specified in diff --git a/src/api/object_api.js b/src/api/object_api.js index 25b5beae87..7842813a9d 100644 --- a/src/api/object_api.js +++ b/src/api/object_api.js @@ -723,6 +723,9 @@ module.exports = { limit: { type: 'integer' }, + list_type: { + type: 'string', + }, } }, reply: { @@ -777,6 +780,9 @@ module.exports = { limit: { type: 'integer' }, + list_type: { + type: 'string', + }, } }, reply: { diff --git a/src/endpoint/s3/ops/s3_get_bucket.js b/src/endpoint/s3/ops/s3_get_bucket.js index 8c9a5cc797..bde3e7c61a 100644 --- a/src/endpoint/s3/ops/s3_get_bucket.js +++ b/src/endpoint/s3/ops/s3_get_bucket.js @@ -41,6 +41,7 @@ async function get_bucket(req) { bucket: req.params.bucket, prefix: req.query.prefix, delimiter: req.query.delimiter, + list_type: list_type, limit: Math.min(max_keys_received, 1000), key_marker: list_type === '2' ? (s3_utils.cont_tok_to_key_marker(cont_tok) || start_after) : req.query.marker, diff --git a/src/native/fs/fs_napi.cpp b/src/native/fs/fs_napi.cpp index af7bfcf137..760e9202d7 100644 --- a/src/native/fs/fs_napi.cpp +++ b/src/native/fs/fs_napi.cpp @@ -2061,7 +2061,7 @@ struct TellDir : public FSWrapWorker } virtual void OnOK() { - DBG0("FS::Telldir::OnOK: " << DVAL(_wrap->_path) << DVAL(_tell_res)); + DBG1("FS::Telldir::OnOK: " << DVAL(_wrap->_path) << DVAL(_tell_res)); Napi::Env env = Env(); auto res = Napi::BigInt::New(env, _tell_res); _deferred.Resolve(res); diff --git a/src/sdk/keymarker_fs.js b/src/sdk/keymarker_fs.js new file mode 100644 index 0000000000..8eb8fc1e9f --- /dev/null +++ b/src/sdk/keymarker_fs.js @@ -0,0 +1,50 @@ +/* Copyright (C) 2020 NooBaa */ +'use strict'; + +class KeyMarkerFS { + constructor({ marker, marker_pos, pre_dir, pre_dir_pos }, is_unsorted = false) { + this.marker = marker; + this.marker_pos = marker_pos.toString(); + this.pre_dir = pre_dir; + this.pre_dir_pos = pre_dir_pos; + this.key_marker_value = marker; + this.current_dir = ''; + this.is_unsorted = is_unsorted; + this.last_pre_dir = ''; + this.last_pre_dir_position = ''; + if (is_unsorted) { + this.current_dir = pre_dir.length > 0 && marker.includes('/') ? + marker.substring(0, marker.lastIndexOf('/') + 1) : ''; + } + } + + async update_key_marker(marker, marker_pos) { + this.marker = marker; + this.marker_pos = marker_pos; + this.key_marker_value = marker; + } + + async get_previour_dir_length() { + return this.pre_dir.length; + } + + async get_previour_dir_info() { + return { + pre_dir_path: this.pre_dir.pop(), + pre_dir_position: this.pre_dir_pos.pop(), + }; + } + + async add_previour_dir(pre_dir, pre_dir_pos) { + this.pre_dir.push(pre_dir); + this.pre_dir_pos.push(pre_dir_pos.toString()); + } + + async update_last_previour_dir(last_pre_dir, last_pre_dir_position) { + this.last_pre_dir = last_pre_dir; + this.last_pre_dir_position = last_pre_dir_position; + } +} + +// EXPORTS +module.exports = KeyMarkerFS; diff --git a/src/sdk/list_object_fs.js b/src/sdk/list_object_fs.js new file mode 100644 index 0000000000..1a50d1e802 --- /dev/null +++ b/src/sdk/list_object_fs.js @@ -0,0 +1,48 @@ +/* Copyright (C) 2020 NooBaa */ +'use strict'; + + +class ListObjectFS { + constructor({fs_context, list_versions, keymarker, prefix_dir_key, is_truncated, delimiter, prefix, + version_id_marker, list_type, results, limit, skip_list, key_marker}) { + this.fs_context = fs_context; + this.keymarker = keymarker; + this.prefix_dir_key = prefix_dir_key; + this.is_truncated = is_truncated; + this.delimiter = delimiter; + this.prefix = prefix; + this.version_id_marker = version_id_marker; + this.list_type = list_type; + this.results = results; + this.limit = limit; + this.skip_list = skip_list; + this.prefix_ent = ''; + this.marker_dir = ''; + this.param_key_marker = key_marker; + this.list_versions = list_versions; + } + + async update_process_dir_properties(prefix_ent, marker_curr, dir_path) { + this.prefix_ent = prefix_ent; + this.dir_path = dir_path; + } + + async update_is_truncated(is_truncated) { + this.is_truncated = is_truncated; + } + + async get_is_truncated() { + return this.is_truncated; + } + + async update_keymarker(keymarker) { + this.keymarker = keymarker; + } + + async get_keymarker() { + return this.keymarker; + } +} + +// EXPORTS +module.exports = ListObjectFS; diff --git a/src/sdk/namespace_fs.js b/src/sdk/namespace_fs.js index 53e12965f7..6cbc6f9fc0 100644 --- a/src/sdk/namespace_fs.js +++ b/src/sdk/namespace_fs.js @@ -4,7 +4,6 @@ 'use strict'; const _ = require('lodash'); -const fs = require('fs'); const path = require('path'); const util = require('util'); const mime = require('mime'); @@ -27,6 +26,9 @@ const { S3Error } = require('../endpoint/s3/s3_errors'); const NoobaaEvent = require('../manage_nsfs/manage_nsfs_events_utils').NoobaaEvent; const { PersistentLogger } = require('../util/persistent_logger'); const { GlacierBackend } = require('./nsfs_glacier_backend/backend'); +const KeyMarkerFS = require('./keymarker_fs'); +const ListObjectFS = require('./list_object_fs'); +const namespace_fs_util = require('../util/namespace_fs_util'); const multi_buffer_pool = new buffer_utils.MultiSizeBuffersPool({ sorted_buf_sizes: [ @@ -54,19 +56,12 @@ const multi_buffer_pool = new buffer_utils.MultiSizeBuffersPool({ const XATTR_USER_PREFIX = 'user.'; const XATTR_NOOBAA_INTERNAL_PREFIX = XATTR_USER_PREFIX + 'noobaa.'; // TODO: In order to verify validity add content_md5_mtime as well -const XATTR_MD5_KEY = XATTR_USER_PREFIX + 'content_md5'; -const XATTR_CONTENT_TYPE = XATTR_NOOBAA_INTERNAL_PREFIX + 'content_type'; const XATTR_PART_OFFSET = XATTR_NOOBAA_INTERNAL_PREFIX + 'part_offset'; const XATTR_PART_SIZE = XATTR_NOOBAA_INTERNAL_PREFIX + 'part_size'; const XATTR_PART_ETAG = XATTR_NOOBAA_INTERNAL_PREFIX + 'part_etag'; const XATTR_VERSION_ID = XATTR_NOOBAA_INTERNAL_PREFIX + 'version_id'; +const XATTR_CONTENT_TYPE = XATTR_NOOBAA_INTERNAL_PREFIX + 'content_type'; const XATTR_DELETE_MARKER = XATTR_NOOBAA_INTERNAL_PREFIX + 'delete_marker'; -const XATTR_DIR_CONTENT = XATTR_NOOBAA_INTERNAL_PREFIX + 'dir_content'; -const XATTR_TAG = XATTR_NOOBAA_INTERNAL_PREFIX + 'tag.'; -const HIDDEN_VERSIONS_PATH = '.versions'; -const NULL_VERSION_ID = 'null'; -const NULL_VERSION_SUFFIX = '_' + NULL_VERSION_ID; -const XATTR_STORAGE_CLASS_KEY = XATTR_USER_PREFIX + 'storage_class'; const VERSIONING_STATUS_ENUM = Object.freeze({ VER_ENABLED: 'ENABLED', @@ -86,154 +81,6 @@ const COPY_STATUS_ENUM = Object.freeze({ FALLBACK: 'FALLBACK' }); -const XATTR_METADATA_IGNORE_LIST = [ - XATTR_STORAGE_CLASS_KEY, -]; - -/** - * @param {fs.Dirent} a - * @param {fs.Dirent} b - * @returns {1|-1|0} - */ -function sort_entries_by_name(a, b) { - if (a.name < b.name) return -1; - if (a.name > b.name) return 1; - return 0; -} - -function _get_version_id_by_stat({ino, mtimeNsBigint}) { - // TODO: GPFS might require generation number to be added to version_id - return 'mtime-' + mtimeNsBigint.toString(36) + '-ino-' + ino.toString(36); -} - -function _is_version_or_null_in_file_name(filename) { - const is_version_object = _is_version_object(filename); - if (!is_version_object) { - return _is_version_null_version(filename); - } - return is_version_object; -} - -function _is_version_null_version(filename) { - return filename.endsWith(NULL_VERSION_SUFFIX); -} - -function _is_version_object(filename) { - const mtime_substr_index = filename.indexOf('_mtime-'); - if (mtime_substr_index < 0) return false; - const ino_substr_index = filename.indexOf('-ino-'); - return ino_substr_index > mtime_substr_index; -} - -function _get_mtime_from_filename(filename) { - if (!_is_version_object(filename)) { - // Latest file wont have time suffix which will push the latest - // object last in the list. So to keep the order maintained, - // returning the latest time. Multiplying with 1e6 to provide - // nano second precision - return BigInt(Date.now() * 1e6); - } - const file_parts = filename.split('-'); - return size_utils.string_to_bigint(file_parts[file_parts.length - 3], 36); -} - -function _get_filename(file_name) { - if (_is_version_object(file_name)) { - return file_name.substring(0, file_name.indexOf('_mtime-')); - } else if (_is_version_null_version(file_name)) { - return file_name.substring(0, file_name.indexOf(NULL_VERSION_SUFFIX)); - } - return file_name; -} -/** - * @param {fs.Dirent} first_entry - * @param {fs.Dirent} second_entry - * @returns {Number} - */ -function sort_entries_by_name_and_time(first_entry, second_entry) { - const first_entry_name = _get_filename(first_entry.name); - const second_entry_name = _get_filename(second_entry.name); - if (first_entry_name === second_entry_name) { - const first_entry_mtime = _get_mtime_from_filename(first_entry.name); - const second_entry_mtime = _get_mtime_from_filename(second_entry.name); - // To sort the versions in the latest first order, - // below logic is followed - if (second_entry_mtime < first_entry_mtime) return -1; - if (second_entry_mtime > first_entry_mtime) return 1; - return 0; - } else { - if (first_entry_name < second_entry_name) return -1; - if (first_entry_name > second_entry_name) return 1; - return 0; - } -} - -// This is helper function for list object version -// In order to sort the entries by name we would like to change the name of files with suffix of '_null' -// to have the structure of _mtime-...-ino-... as version id. -// This function returns a set that contains all file names that were changed (after change) -// and an array old_versions_after_rename which is old_versions without the versions that stat failed on -async function _rename_null_version(old_versions, fs_context, version_path) { - const renamed_null_versions_set = new Set(); - const old_versions_after_rename = []; - - for (const old_version of old_versions) { - if (_is_version_null_version(old_version.name)) { - try { - const stat = await nb_native().fs.stat(fs_context, path.join(version_path, old_version.name)); - const mtime_ino = _get_version_id_by_stat(stat); - const original_name = _get_filename(old_version.name); - const version_with_mtime_ino = original_name + '_' + mtime_ino; - old_version.name = version_with_mtime_ino; - renamed_null_versions_set.add(version_with_mtime_ino); - } catch (err) { - // to cover an edge case where stat fails - // for example another process deleted an object and we get ENOENT - // just before executing this command but after the starting list object versions - dbg.error(`_rename_null_version of ${old_version.name} got error:`, err); - old_version.name = undefined; - } - } - if (old_version.name) old_versions_after_rename.push(old_version); - } - return { renamed_null_versions_set, old_versions_after_rename }; -} - - -/** - * - * @param {*} stat - entity stat yo check - * @param {*} fs_context - account config using to check symbolic links - * @param {*} entry_path - path of symbolic link - * @returns - */ -async function is_directory_or_symlink_to_directory(stat, fs_context, entry_path) { - try { - let r = native_fs_utils.isDirectory(stat); - if (!r && is_symbolic_link(stat)) { - const targetStat = await nb_native().fs.stat(fs_context, entry_path); - if (!targetStat) throw new Error('is_directory_or_symlink_to_directory: targetStat is empty'); - r = native_fs_utils.isDirectory(targetStat); - } - return r; - } catch (err) { - if (err.code !== 'ENOENT') { - throw err; - } - } -} - -function is_symbolic_link(stat) { - if (!stat) throw new Error('isSymbolicLink: stat is empty'); - if (stat.mode) { - // eslint-disable-next-line no-bitwise - return (((stat.mode) & nb_native().fs.S_IFMT) === nb_native().fs.S_IFLNK); - } else if (stat.type) { - return stat.type === nb_native().fs.DT_LNK; - } else { - throw new Error(`isSymbolicLink: stat ${stat} is not supported`); - } -} /** * NOTICE that even files that were written sequentially, can still be identified as sparse: @@ -250,45 +97,6 @@ function is_sparse_file(stat) { return (stat.blocks * 512 < stat.size); } -/** - * @param {fs.Dirent} e - * @returns {string} - */ -function get_entry_name(e) { - return e.name; -} - -/** - * @param {string} name - * @returns {fs.Dirent} - */ -function make_named_dirent(name) { - const entry = new fs.Dirent(); - entry.name = name; - return entry; -} - -function to_xattr(fs_xattr) { - const xattr = _.mapKeys(fs_xattr, (val, key) => { - // Prioritize ignore list - if (XATTR_METADATA_IGNORE_LIST.includes(key)) return ''; - - // Fallback to rules - - if (key.startsWith(XATTR_USER_PREFIX) && !key.startsWith(XATTR_NOOBAA_INTERNAL_PREFIX)) { - return key.slice(XATTR_USER_PREFIX.length); - } - - return ''; - }); - - // keys which do not start with prefix will all map to the empty string key, so we remove it once - delete xattr['']; - // @ts-ignore - xattr[s3_utils.XATTR_SORT_SYMBOL] = true; - return xattr; -} - function to_fs_xattr(xattr) { if (_.isEmpty(xattr)) return undefined; return _.mapKeys(xattr, (val, key) => XATTR_USER_PREFIX + key); @@ -305,130 +113,6 @@ function get_random_delay(base, min, max) { return base + crypto.randomInt(min, max); } -/** - * @typedef {{ - * time: number, - * stat: nb.NativeFSStats, - * usage: number, - * sorted_entries?: fs.Dirent[], - * }} ReaddirCacheItem - * @type {LRUCache} - */ -const dir_cache = new LRUCache({ - name: 'nsfs-dir-cache', - make_key: ({ dir_path }) => dir_path, - load: async ({ dir_path, fs_context }) => { - const time = Date.now(); - const stat = await nb_native().fs.stat(fs_context, dir_path); - let sorted_entries; - let usage = config.NSFS_DIR_CACHE_MIN_DIR_SIZE; - if (stat.size <= config.NSFS_DIR_CACHE_MAX_DIR_SIZE) { - sorted_entries = await nb_native().fs.readdir(fs_context, dir_path); - sorted_entries.sort(sort_entries_by_name); - for (const ent of sorted_entries) { - usage += ent.name.length + 4; - } - } - return { time, stat, sorted_entries, usage }; - }, - validate: async ({ stat }, { dir_path, fs_context }) => { - const new_stat = await nb_native().fs.stat(fs_context, dir_path); - return (new_stat.ino === stat.ino && new_stat.mtimeNsBigint === stat.mtimeNsBigint); - }, - item_usage: ({ usage }, dir_path) => usage, - max_usage: config.NSFS_DIR_CACHE_MAX_TOTAL_SIZE, -}); - -/** - * @typedef {{ - * time: number, - * stat: nb.NativeFSStats, - * ver_dir_stat: nb.NativeFSStats, - * usage: number, - * sorted_entries?: fs.Dirent[], - * }} ReaddirVersionsCacheItem - * @type {LRUCache} - */ -const versions_dir_cache = new LRUCache({ - name: 'nsfs-versions-dir-cache', - make_key: ({ dir_path }) => dir_path, - load: async ({ dir_path, fs_context }) => { - const time = Date.now(); - const stat = await nb_native().fs.stat(fs_context, dir_path); - const version_path = dir_path + "/" + HIDDEN_VERSIONS_PATH; - let ver_dir_stat_size; - let is_version_path_exists = false; - let ver_dir_stat; - try { - ver_dir_stat = await nb_native().fs.stat(fs_context, version_path); - ver_dir_stat_size = ver_dir_stat.size; - is_version_path_exists = true; - } catch (err) { - if (err.code === 'ENOENT') { - dbg.log0('NamespaceFS: Version dir not found, ', version_path); - } else { - throw err; - } - ver_dir_stat = null; - ver_dir_stat_size = 0; - } - let sorted_entries; - let usage = config.NSFS_DIR_CACHE_MIN_DIR_SIZE; - if (stat.size + ver_dir_stat_size <= config.NSFS_DIR_CACHE_MAX_DIR_SIZE) { - const latest_versions = await nb_native().fs.readdir(fs_context, dir_path); - if (is_version_path_exists) { - const old_versions = await nb_native().fs.readdir(fs_context, version_path); - // In case we have a null version id inside .versions/ directory we will rename it - // Therefore, old_versions_after_rename will not include an entry with 'null' suffix - // (in case stat fails on a version we would remove it from the array) - const { - renamed_null_versions_set, - old_versions_after_rename - } = await _rename_null_version(old_versions, fs_context, version_path); - const entries = latest_versions.concat(old_versions_after_rename); - sorted_entries = entries.sort(sort_entries_by_name_and_time); - // rename back version to include 'null' suffix. - if (renamed_null_versions_set.size > 0) { - for (const ent of sorted_entries) { - if (renamed_null_versions_set.has(ent.name)) { - const file_name = _get_filename(ent.name); - const version_name_with_null = file_name + NULL_VERSION_SUFFIX; - ent.name = version_name_with_null; - } - } - } - } else { - sorted_entries = latest_versions.sort(sort_entries_by_name); - } - /*eslint no-unused-expressions: ["error", { "allowTernary": true }]*/ - for (const ent of sorted_entries) { - usage += ent.name.length + 4; - } - } - return { time, stat, ver_dir_stat, sorted_entries, usage }; - }, - validate: async ({ stat, ver_dir_stat }, { dir_path, fs_context }) => { - const new_stat = await nb_native().fs.stat(fs_context, dir_path); - const versions_dir_path = path.normalize(path.join(dir_path, '/', HIDDEN_VERSIONS_PATH)); - let new_versions_stat; - try { - new_versions_stat = await nb_native().fs.stat(fs_context, versions_dir_path); - } catch (err) { - if (err.code === 'ENOENT') { - dbg.log0('NamespaceFS: Version dir not found, ', versions_dir_path); - } else { - throw err; - } - } - return (new_stat.ino === stat.ino && - new_stat.mtimeNsBigint === stat.mtimeNsBigint && - new_versions_stat?.ino === ver_dir_stat?.ino && - new_versions_stat?.mtimeNsBigint === ver_dir_stat?.mtimeNsBigint); - }, - item_usage: ({ usage }, dir_path) => usage, - max_usage: config.NSFS_DIR_CACHE_MAX_TOTAL_SIZE, -}); - /** * @typedef {{ * statfs: Record @@ -505,13 +189,6 @@ class NamespaceFS { return fs_context; } - /** - * @returns {string} - */ - get_bucket_tmpdir_name() { - return native_fs_utils.get_bucket_tmpdir_name(this.bucket_id); - } - /** * @returns {string} */ @@ -593,6 +270,7 @@ class NamespaceFS { * prefix?: string, * delimiter?: string, * key_marker?: string, + * list_type?: string, * limit?: number, * }} ListParams */ @@ -611,6 +289,7 @@ class NamespaceFS { * prefix?: string, * delimiter?: string, * key_marker?: string, + * list_type?: string, * version_id_marker?: string, * limit?: number, * }} ListVersionsParams @@ -633,8 +312,8 @@ class NamespaceFS { prefix = '', version_id_marker = '', key_marker = '', + list_type = '', } = params; - if (delimiter && delimiter !== '/') { throw new Error('NamespaceFS: Invalid delimiter ' + delimiter); } @@ -644,8 +323,20 @@ class NamespaceFS { // This is used in order to follow aws spec and behaviour if (!limit) return { is_truncated: false, objects: [], common_prefixes: [] }; - let is_truncated = false; - + const is_truncated = false; + const skip_list = false; + let keymarker; + if (typeof(key_marker) === 'object') { + keymarker = new KeyMarkerFS(key_marker, true); + } else { + keymarker = new KeyMarkerFS({ + marker: key_marker, + marker_pos: '', + pre_dir: [], + pre_dir_pos: [], + }); + } + dbg.log1('list object bucket :', bucket, ' key_marker :', keymarker.key_marker_value, 'list_type : ', list_type); /** * @typedef {{ * key: string, @@ -658,270 +349,151 @@ class NamespaceFS { /** @type {Result[]} */ const results = []; - /** - * @param {string} dir_key - * @returns {Promise} - */ - const process_dir = async dir_key => { - if (this._is_hidden_version_path(dir_key)) { - return; - } - // /** @type {fs.Dir} */ - let dir_handle; - /** @type {ReaddirCacheItem} */ - let cached_dir; - const dir_path = path.join(this.bucket_path, dir_key); - const prefix_dir = prefix.slice(0, dir_key.length); - const prefix_ent = prefix.slice(dir_key.length); - if (!dir_key.startsWith(prefix_dir)) { - // dbg.log0(`prefix dir does not match so no keys in this dir can apply: dir_key=${dir_key} prefix_dir=${prefix_dir}`); - return; - } - const marker_dir = key_marker.slice(0, dir_key.length); - const marker_ent = key_marker.slice(dir_key.length); - // marker is after dir so no keys in this dir can apply - if (dir_key < marker_dir) { - // dbg.log0(`marker is after dir so no keys in this dir can apply: dir_key=${dir_key} marker_dir=${marker_dir}`); - return; - } - // when the dir portion of the marker is completely below the current dir - // then every key in this dir satisfies the marker and marker_ent should not be used. - const marker_curr = (marker_dir < dir_key) ? '' : marker_ent; - // dbg.log0(`process_dir: dir_key=${dir_key} prefix_ent=${prefix_ent} marker_curr=${marker_curr}`); - /** - * @typedef {{ - * key: string, - * common_prefix: boolean - * }} - */ - const insert_entry_to_results_arr = async r => { - let pos; - // Since versions are arranged next to latest object in the latest first order, - // no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as - // they are in order - if (results.length && r.key < results[results.length - 1].key && - !this._is_hidden_version_path(r.key)) { - pos = _.sortedLastIndexBy(results, r, a => a.key); - } else { - pos = results.length; - } - - if (pos >= limit) { - is_truncated = true; - return; // not added - } - if (!delimiter && r.common_prefix) { - await process_dir(r.key); - } else { - if (pos < results.length) { - results.splice(pos, 0, r); - } else { - results.push(r); - } - if (results.length > limit) { - results.length = limit; - is_truncated = true; - } - } - }; - - /** - * @param {fs.Dirent} ent - */ - const process_entry = async ent => { - // dbg.log0('process_entry', dir_key, ent.name); - if ((!ent.name.startsWith(prefix_ent) || - ent.name < marker_curr || - ent.name === this.get_bucket_tmpdir_name() || - ent.name === config.NSFS_FOLDER_OBJECT_NAME) || - this._is_hidden_version_path(ent.name)) { - return; - } - const isDir = await is_directory_or_symlink_to_directory(ent, fs_context, path.join(dir_path, ent.name)); - - let r; - if (list_versions && _is_version_or_null_in_file_name(ent.name)) { - r = { - key: this._get_version_entry_key(dir_key, ent), - common_prefix: isDir, - is_latest: false - }; - } else { - r = { - key: this._get_entry_key(dir_key, ent, isDir), - common_prefix: isDir, - is_latest: true - }; - } - await insert_entry_to_results_arr(r); - }; - - if (!(await this.check_access(fs_context, dir_path))) return; - try { - if (list_versions) { - cached_dir = await versions_dir_cache.get_with_cache({ dir_path, fs_context }); - } else { - cached_dir = await dir_cache.get_with_cache({ dir_path, fs_context }); - } - } catch (err) { - if (['ENOENT', 'ENOTDIR'].includes(err.code)) { - dbg.log0('NamespaceFS: no keys for non existing dir', dir_path); - return; - } - throw err; - } - - // insert dir object to objects list if its key is lexicographicly bigger than the key marker && - // no delimiter OR prefix is the current directory entry - const is_dir_content = cached_dir.stat.xattr && cached_dir.stat.xattr[XATTR_DIR_CONTENT]; - if (is_dir_content && dir_key > key_marker && (!delimiter || dir_key === prefix)) { - const r = { key: dir_key, common_prefix: false }; - await insert_entry_to_results_arr(r); - } - - if (cached_dir.sorted_entries) { - const sorted_entries = cached_dir.sorted_entries; - let marker_index; - // Two ways followed here to find the index. - // 1. When inside marker_dir: Here the entries are sorted based on time. Here - // FindIndex() is called since sortedLastIndexBy() expects sorted order by name - // 2. When marker_dir above dir_path: sortedLastIndexBy() is called since entries are - // sorted by name - // 3. One of the below conditions, marker_curr.includes('/') checks whether - // the call is for the directory that contains marker_curr - if (list_versions && marker_curr && !marker_curr.includes('/')) { - let start_marker = marker_curr; - if (version_id_marker) start_marker = version_id_marker; - marker_index = _.findIndex( - sorted_entries, - {name: start_marker} - ) + 1; - } else { - marker_index = _.sortedLastIndexBy( - sorted_entries, - make_named_dirent(marker_curr), - get_entry_name - ); - } - - // handling a scenario in which key_marker points to an object inside a directory - // since there can be entries inside the directory that will need to be pushed - // to results array - if (marker_index) { - const prev_dir = sorted_entries[marker_index - 1]; - const prev_dir_name = prev_dir.name; - if (marker_curr.startsWith(prev_dir_name) && dir_key !== prev_dir.name) { - if (!delimiter) { - const isDir = await is_directory_or_symlink_to_directory( - prev_dir, fs_context, path.join(dir_path, prev_dir_name, '/')); - if (isDir) { - await process_dir(path.join(dir_key, prev_dir_name, '/')); - } - } - } - } - for (let i = marker_index; i < sorted_entries.length; ++i) { - const ent = sorted_entries[i]; - // when entry is NSFS_FOLDER_OBJECT_NAME=.folder file, - // and the dir key marker is the name of the curr directory - skip on adding it - if (ent.name === config.NSFS_FOLDER_OBJECT_NAME && dir_key === marker_dir) { - continue; - } - await process_entry(ent); - // since we traverse entries in sorted order, - // we can break as soon as enough keys are collected. - if (is_truncated) break; - } - return; - } - // for large dirs we cannot keep all entries in memory - // so we have to stream the entries one by one while filtering only the needed ones. - try { - dbg.warn('NamespaceFS: open dir streaming', dir_path, 'size', cached_dir.stat.size); - dir_handle = await nb_native().fs.opendir(fs_context, dir_path); //, { bufferSize: 128 }); - for (;;) { - const dir_entry = await dir_handle.read(fs_context); - if (!dir_entry) break; - await process_entry(dir_entry); - // since we dir entries streaming order is not sorted, - // we have to keep scanning all the keys before we can stop. - } - await dir_handle.close(fs_context); - dir_handle = null; - } finally { - if (dir_handle) { - try { - dbg.warn('NamespaceFS: close dir streaming', dir_path, 'size', cached_dir.stat.size); - await dir_handle.close(fs_context); - } catch (err) { - dbg.error('NamespaceFS: close dir failed', err); - } - dir_handle = null; - } - } - }; - - let previous_key; - /** - * delete markers are always in the .versions folder, so we need to have special case to determine - * if they are delete markers. since the result list is ordered by latest entries first, the first - * entry of every key is the latest - * TODO need different way to check for isLatest in case of unordered list object versions - * @param {Object} obj_info - */ - const set_latest_delete_marker = obj_info => { - if (obj_info.delete_marker && previous_key !== obj_info.key) { - obj_info.is_latest = true; - } - }; const prefix_dir_key = prefix.slice(0, prefix.lastIndexOf('/') + 1); - await process_dir(prefix_dir_key); - await Promise.all(results.map(async r => { + // current_dir added for unsorted listing + const list_obj = new ListObjectFS({ fs_context, list_versions, keymarker, prefix_dir_key, is_truncated, delimiter, + prefix, version_id_marker, list_type, results, limit, skip_list, key_marker + }); + await namespace_fs_util.process_dir(this.bucket_path, this.bucket_id, prefix_dir_key + keymarker.current_dir, list_obj); + await Promise.all(list_obj.results.map(async r => { if (r.common_prefix) return; const entry_path = path.join(this.bucket_path, r.key); //If entry is outside of bucket, returns stat of symbolic link - const use_lstat = !(await this._is_path_in_bucket_boundaries(fs_context, entry_path)); + const use_lstat = !(await namespace_fs_util._is_path_in_bucket_boundaries(fs_context, entry_path, this.bucket_path)); r.stat = await nb_native().fs.stat(fs_context, entry_path, { use_lstat }); })); - const res = { - objects: [], - common_prefixes: [], - is_truncated, - next_marker: undefined, - next_version_id_marker: undefined, - }; - for (const r of results) { - let obj_info; - if (r.common_prefix) { - res.common_prefixes.push(r.key); - } else { - obj_info = this._get_object_info(bucket, r.key, r.stat, false, r.is_latest); - if (!list_versions && obj_info.delete_marker) { - continue; - } - if (this._is_hidden_version_path(obj_info.key)) { - obj_info.key = path.normalize(obj_info.key.replace(HIDDEN_VERSIONS_PATH + '/', '')); - obj_info.key = _get_filename(obj_info.key); - set_latest_delete_marker(obj_info); - } - res.objects.push(obj_info); - previous_key = obj_info.key; + return await this.prepare_result(bucket, await list_obj.get_is_truncated(), list_obj.results, list_versions, + await list_obj.get_keymarker(), list_type); + } catch (err) { + throw native_fs_utils.translate_error_codes(err, native_fs_utils.entity_enum.OBJECT); + } + } + + /** + * Prepare result for list_type 1 and 2, + * For list_type 1 : return simply `next_marke`, it cant hold complex objects, Because of that next marker dosnt + * hold and position for file system, + * For list_type 2 : Return object that contains `marker`, `marker_pos`, parent dir structure with positions for + * back tracking when the child dir list all files. + * @param {string} bucket + * @param {boolean} is_truncated + * @param {object[]} results + * @param {boolean} list_versions + * @param {Object} keymarker + * @param {string} list_type + */ + async prepare_result(bucket, is_truncated, results, list_versions, keymarker, list_type) { + const res = { + objects: [], + common_prefixes: [], + is_truncated, + next_marker: undefined, + next_version_id_marker: undefined, + }; + + let previous_key; + /** + * delete markers are always in the .versions folder, so we need to have special case to determine + * if they are delete markers. since the result list is ordered by latest entries first, the first + * entry of every key is the latest + * TODO need different way to check for isLatest in case of unordered list object versions + * @param {Object} obj_info + */ + const set_latest_delete_marker = obj_info => { + if (obj_info.delete_marker && previous_key !== obj_info.key) { + obj_info.is_latest = true; + } + }; + for (const r of results) { + let obj_info; + if (r.common_prefix) { + res.common_prefixes.push(r.key); + } else { + obj_info = this._get_object_info(bucket, r.key, r.stat, false, r.is_latest); + if (!list_versions && obj_info.delete_marker) { + continue; } - if (res.is_truncated) { - if (list_versions && _is_version_object(r.key)) { - const next_version_id_marker = r.key.substring(r.key.lastIndexOf('/') + 1); - res.next_version_id_marker = next_version_id_marker; - res.next_marker = _get_filename(next_version_id_marker); - } else { - res.next_marker = r.key; - } + if (namespace_fs_util._is_hidden_version_path(obj_info.key)) { + obj_info.key = path.normalize(obj_info.key.replace(namespace_fs_util.HIDDEN_VERSIONS_PATH + '/', '')); + obj_info.key = namespace_fs_util._get_filename(obj_info.key); + set_latest_delete_marker(obj_info); + } + res.objects.push(obj_info); + previous_key = obj_info.key; + } + if (res.is_truncated) { + if (list_versions && namespace_fs_util._is_version_object(r.key)) { + const next_version_id_marker = r.key.substring(r.key.lastIndexOf('/') + 1); + res.next_version_id_marker = next_version_id_marker; + res.next_marker = list_type === '2' ? { + marker: namespace_fs_util._get_filename(next_version_id_marker), + marker_pos: r.marker_pos ? r.marker_pos.toString() : '', + pre_dir: keymarker.pre_dir, + pre_dir_pos: keymarker.pre_dir_pos, + } : namespace_fs_util._get_filename(next_version_id_marker); + } else { + res.next_marker = list_type === '2' ? { + marker: r.key, + marker_pos: r.marker_pos ? r.marker_pos.toString() : '', + pre_dir: keymarker.pre_dir, + pre_dir_pos: keymarker.pre_dir_pos, + } : r.key; } } - return res; - } catch (err) { - throw native_fs_utils.translate_error_codes(err, native_fs_utils.entity_enum.OBJECT); } + return res; + } + + /** + * @param {string} bucket + * @param {string} key + * @param {nb.NativeFSStats} stat + * @param {Boolean} isDir + * @param {boolean} [is_latest=true] + * @returns {nb.ObjectInfo} + */ + _get_object_info(bucket, key, stat, isDir, is_latest = true) { + const etag = namespace_fs_util._get_etag(stat); + const create_time = stat.mtime.getTime(); + const encryption = namespace_fs_util._get_encryption_info(stat); + const version_id = ((this._is_versioning_enabled() || this._is_versioning_suspended()) && this._get_version_id_by_xattr(stat)) || + undefined; + const delete_marker = stat.xattr?.[XATTR_DELETE_MARKER] === 'true'; + const dir_content_type = stat.xattr?.[namespace_fs_util.XATTR_DIR_CONTENT] && ((Number(stat.xattr?.[namespace_fs_util.XATTR_DIR_CONTENT]) > 0 && 'application/octet-stream') || 'application/x-directory'); + const content_type = stat.xattr?.[XATTR_CONTENT_TYPE] || + (isDir && dir_content_type) || + mime.getType(key) || 'application/octet-stream'; + const storage_class = s3_utils.parse_storage_class(stat.xattr?.[namespace_fs_util.XATTR_STORAGE_CLASS_KEY]); + const size = Number(stat.xattr?.[namespace_fs_util.XATTR_DIR_CONTENT] || stat.size); + const tag_count = stat.xattr ? namespace_fs_util._number_of_tags_fs_xttr(stat.xattr) : 0; + + return { + obj_id: etag, + bucket, + key, + size, + etag, + create_time, + content_type, + encryption, + version_id, + is_latest, + delete_marker, + storage_class, + restore_status: GlacierBackend.get_restore_status(stat.xattr, new Date(), this._get_file_path({key})), + xattr: namespace_fs_util.to_xattr(stat.xattr), + tag_count, + + // temp: + lock_settings: undefined, + md5_b64: undefined, + num_parts: undefined, + sha256_b64: undefined, + stats: undefined, + tagging: undefined, + object_owner: namespace_fs_util._get_object_owner() + }; } ///////////////// @@ -938,14 +510,14 @@ class NamespaceFS { for (;;) { try { file_path = await this._find_version_path(fs_context, params, true); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); await this._load_bucket(params, fs_context); stat = await nb_native().fs.stat(fs_context, file_path); isDir = native_fs_utils.isDirectory(stat); if (isDir) { - if (!stat.xattr?.[XATTR_DIR_CONTENT] || !params.key.endsWith('/')) { + if (!stat.xattr?.[namespace_fs_util.XATTR_DIR_CONTENT] || !params.key.endsWith('/')) { throw error_utils.new_error_code('ENOENT', 'NoSuchKey'); - } else if (stat.xattr?.[XATTR_DIR_CONTENT] !== '0') { + } else if (stat.xattr?.[namespace_fs_util.XATTR_DIR_CONTENT] !== '0') { // find dir object content file path and return its stat + xattr of its parent directory const dir_content_path = await this._find_version_path(fs_context, params); const dir_content_path_stat = await nb_native().fs.stat(fs_context, dir_content_path); @@ -976,12 +548,12 @@ class NamespaceFS { } async _is_empty_directory_content(file_path, fs_context, params) { - const is_dir_content = this._is_directory_content(file_path, params.key); + const is_dir_content = namespace_fs_util._is_directory_content(file_path, params.key); if (is_dir_content) { try { const md_path = this._get_file_md_path(params); const dir_stat = await nb_native().fs.stat(fs_context, md_path); - if (dir_stat && dir_stat.xattr[XATTR_DIR_CONTENT] === '0') return true; + if (dir_stat && dir_stat.xattr[namespace_fs_util.XATTR_DIR_CONTENT] === '0') return true; } catch (err) { //failed to get object new NoobaaEvent(NoobaaEvent.OBJECT_GET_FAILED).create_event(params.key, @@ -1005,7 +577,7 @@ class NamespaceFS { for (;;) { try { file_path = await this._find_version_path(fs_context, params); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); // NOTE: don't move this code after the open // this can lead to ENOENT failures due to file not exists when content size is 0 @@ -1197,7 +769,7 @@ class NamespaceFS { const file_path = this._get_file_path(params); let upload_params; try { - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); if (this.empty_dir_content_flow(file_path, params)) { const content_dir_info = await this._create_empty_dir_content(fs_context, params, file_path); @@ -1276,7 +848,7 @@ class NamespaceFS { // on non server side copy - we will immediatly do the fallback async _try_copy_file(fs_context, params, file_path, upload_path) { const source_file_path = await this._find_version_path(fs_context, params.copy_source); - await this._check_path_in_bucket_boundaries(fs_context, source_file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, source_file_path, this.bucket_path); // await this._fail_if_archived_or_sparse_file(fs_context, source_file_path, stat); let res = COPY_STATUS_ENUM.FALLBACK; if (this._is_versioning_disabled()) { @@ -1310,7 +882,7 @@ class NamespaceFS { if (params.copy_source) { const src_file_path = await this._find_version_path(fs_context, params.copy_source); const stat = await nb_native().fs.stat(fs_context, src_file_path); - const src_storage_class = s3_utils.parse_storage_class(stat.xattr[XATTR_STORAGE_CLASS_KEY]); + const src_storage_class = s3_utils.parse_storage_class(stat.xattr[namespace_fs_util.XATTR_STORAGE_CLASS_KEY]); const src_restore_status = GlacierBackend.get_restore_status(stat.xattr, new Date(), src_file_path); if (src_storage_class === s3_utils.STORAGE_CLASS_GLACIER) { @@ -1337,10 +909,10 @@ class NamespaceFS { const part_upload = file_path === upload_path; const same_inode = params.copy_source && copy_res === COPY_STATUS_ENUM.SAME_INODE; const should_replace_xattr = params.copy_source ? copy_res === COPY_STATUS_ENUM.FALLBACK : true; - const is_dir_content = this._is_directory_content(file_path, params.key); + const is_dir_content = namespace_fs_util._is_directory_content(file_path, params.key); const stat = await target_file.stat(fs_context); - this._verify_encryption(params.encryption, this._get_encryption_info(stat)); + this._verify_encryption(params.encryption, namespace_fs_util._get_encryption_info(stat)); const copy_xattr = params.copy_source && params.xattr_copy; let fs_xattr = to_fs_xattr(params.xattr); @@ -1366,7 +938,7 @@ class NamespaceFS { } if (!part_upload && params.storage_class) { fs_xattr = Object.assign(fs_xattr || {}, { - [XATTR_STORAGE_CLASS_KEY]: params.storage_class + [namespace_fs_util.XATTR_STORAGE_CLASS_KEY]: params.storage_class }); if (params.storage_class === s3_utils.STORAGE_CLASS_GLACIER) { @@ -1376,7 +948,7 @@ class NamespaceFS { if (params.tagging) { for (const { key, value } of params.tagging) { fs_xattr = Object.assign(fs_xattr || {}, { - [XATTR_TAG + key]: value + [namespace_fs_util.XATTR_TAG + key]: value }); } } @@ -1445,7 +1017,7 @@ class NamespaceFS { if (err.code !== 'ENOENT') throw err; // checking that the source_path still exists // TODO: handle tmp file - source_path is missing - if (source_path && !await this.check_access(fs_context, source_path)) throw err; + if (source_path && !await namespace_fs_util.check_access(fs_context, source_path)) throw err; dbg.warn(`NamespaceFS: Retrying failed move to dest retries=${retries}` + ` source_path=${source_path} dest_path=${dest_path}`, err); } @@ -1494,7 +1066,7 @@ class NamespaceFS { dbg.log1('Namespace_fs._move_to_dest_version:', latest_ver_info, new_ver_info, gpfs_options); if (this._is_versioning_suspended()) { - if (latest_ver_info?.version_id_str === NULL_VERSION_ID) { + if (latest_ver_info?.version_id_str === namespace_fs_util.NULL_VERSION_ID) { //on GPFS safe_move overrides the latest object so no need to unlink if (!is_gpfs) { dbg.log1('NamespaceFS._move_to_dest_version suspended: version ID of the latest version is null - the file will be unlinked'); @@ -1507,7 +1079,7 @@ class NamespaceFS { } if (latest_ver_info && ((this._is_versioning_enabled()) || - (this._is_versioning_suspended() && latest_ver_info.version_id_str !== NULL_VERSION_ID))) { + (this._is_versioning_suspended() && latest_ver_info.version_id_str !== namespace_fs_util.NULL_VERSION_ID))) { dbg.log1('NamespaceFS._move_to_dest_version version ID of the latest version is a unique ID - the file will be moved it to .versions/ directory'); await native_fs_utils._make_path_dirs(versioned_path, fs_context); await native_fs_utils.safe_move(fs_context, latest_ver_path, versioned_path, latest_ver_info, @@ -1610,7 +1182,7 @@ class NamespaceFS { const fs_context = this.prepare_fs_context(object_sdk); await this._load_bucket(params, fs_context); const mpu_root_path = this._mpu_root_path(); - await this._check_path_in_bucket_boundaries(fs_context, mpu_root_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, mpu_root_path, this.bucket_path); const multipart_upload_dirs = await nb_native().fs.readdir(fs_context, mpu_root_path); const common_prefixes_set = new Set(); const multipart_uploads = await P.map(multipart_upload_dirs, async obj => { @@ -1753,7 +1325,7 @@ class NamespaceFS { try { const fs_context = this.prepare_fs_context(object_sdk); await this._load_multipart(params, fs_context); - await this._check_path_in_bucket_boundaries(fs_context, params.mpu_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, params.mpu_path, this.bucket_path); const { data } = await nb_native().fs.readFile( fs_context, path.join(params.mpu_path, 'create_object_upload') @@ -1770,7 +1342,7 @@ class NamespaceFS { return { num, size: stat.size, - etag: this._get_etag(stat), + etag: namespace_fs_util._get_etag(stat), last_modified: new Date(stat.mtime), }; }) @@ -1810,7 +1382,7 @@ class NamespaceFS { await this._load_multipart(params, fs_context); const file_path = this._get_file_path(params); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); const upload_path = path.join(params.mpu_path, 'final'); target_file = null; @@ -1822,7 +1394,7 @@ class NamespaceFS { const md_part_stat = await nb_native().fs.stat(fs_context, md_part_path); const part_size = Number(md_part_stat.xattr[XATTR_PART_SIZE]); const part_offset = Number(md_part_stat.xattr[XATTR_PART_OFFSET]); - if (etag !== this._get_etag(md_part_stat)) { + if (etag !== namespace_fs_util._get_etag(md_part_stat)) { throw new Error('mismatch part etag: ' + util.inspect({ num, etag, md_part_path, md_part_stat, params })); } if (MD5Async) await MD5Async.update(Buffer.from(etag, 'hex')); @@ -1927,7 +1499,7 @@ class NamespaceFS { const fs_context = this.prepare_fs_context(object_sdk); await this._load_bucket(params, fs_context); const file_path = await this._find_version_path(fs_context, params); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); dbg.log0('NamespaceFS: delete_object', file_path); let res; const is_key_dir_path = await this._is_key_dir_path(fs_context, params.key); @@ -1960,7 +1532,7 @@ class NamespaceFS { } try { const file_path = this._get_file_path({ key }); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); dbg.log1('NamespaceFS: delete_multiple_objects', file_path); await this._delete_single_object(fs_context, file_path, { key }); res.push({ key }); @@ -1993,7 +1565,7 @@ class NamespaceFS { await this._delete_path_dirs(file_path, fs_context); // when deleting the data of a directory object, we need to remove the directory dir object xattr // if the dir still exists - occurs when deleting dir while the dir still has entries in it - if (this._is_directory_content(file_path, params.key)) { + if (namespace_fs_util._is_directory_content(file_path, params.key)) { await this._clear_user_xattr(fs_context, this._get_file_md_path(params), XATTR_USER_PREFIX); } } @@ -2034,9 +1606,9 @@ class NamespaceFS { const stat = await file.stat(fs_context); if (stat.xattr) { for (const [xattr_key, xattr_value] of Object.entries(stat.xattr)) { - if (xattr_key.includes(XATTR_TAG)) { + if (xattr_key.includes(namespace_fs_util.XATTR_TAG)) { tag_set.push({ - key: xattr_key.replace(XATTR_TAG, ''), + key: xattr_key.replace(namespace_fs_util.XATTR_TAG, ''), value: xattr_value, }); } @@ -2062,7 +1634,7 @@ class NamespaceFS { } const fs_context = this.prepare_fs_context(object_sdk); try { - await this._clear_user_xattr(fs_context, file_path, XATTR_TAG); + await this._clear_user_xattr(fs_context, file_path, namespace_fs_util.XATTR_TAG); } catch (err) { dbg.error(`NamespaceFS.delete_object_tagging: failed in dir ${file_path} with error: `, err); throw native_fs_utils.translate_error_codes(err, native_fs_utils.entity_enum.OBJECT); @@ -2074,7 +1646,7 @@ class NamespaceFS { const fs_xattr = {}; const tagging = params.tagging && Object.fromEntries(params.tagging.map(tag => ([tag.key, tag.value]))); for (const [xattr_key, xattr_value] of Object.entries(tagging)) { - fs_xattr[XATTR_TAG + xattr_key] = xattr_value; + fs_xattr[namespace_fs_util.XATTR_TAG + xattr_key] = xattr_value; } let file_path; if (params.version_id && this._is_versioning_enabled()) { @@ -2086,7 +1658,7 @@ class NamespaceFS { dbg.log0('NamespaceFS.put_object_tagging: fs_xattr ', fs_xattr, 'file_path :', file_path); try { // remove existng tag before putting new tags - await this._clear_user_xattr(fs_context, file_path, XATTR_TAG); + await this._clear_user_xattr(fs_context, file_path, namespace_fs_util.XATTR_TAG); await this.set_fs_xattr_op(fs_context, file_path, fs_xattr, undefined); } catch (err) { dbg.error(`NamespaceFS.put_object_tagging: failed in dir ${file_path} with error: `, err); @@ -2248,13 +1820,13 @@ class NamespaceFS { const p = this._get_file_path({ key }); // when the key refers to a directory (trailing /) but we would like to return the md path // we return the parent directory of .folder - return this._is_directory_content(p, key) ? path.join(path.dirname(p), '/') : p; + return namespace_fs_util._is_directory_content(p, key) ? path.join(path.dirname(p), '/') : p; } _assign_md5_to_fs_xattr(md5_digest, fs_xattr) { // TODO: Assign content_md5_mtime fs_xattr = Object.assign(fs_xattr || {}, { - [XATTR_MD5_KEY]: md5_digest + [namespace_fs_util.XATTR_MD5_KEY]: md5_digest }); return fs_xattr; } @@ -2330,126 +1902,16 @@ class NamespaceFS { async _assign_dir_content_to_xattr(fs_context, fs_xattr, params, copy_xattr) { const dir_path = this._get_file_md_path(params); fs_xattr = Object.assign(fs_xattr || {}, { - [XATTR_DIR_CONTENT]: params.size || 0 + [namespace_fs_util.XATTR_DIR_CONTENT]: params.size || 0 }); // when copying xattr we shouldn't clear user xattr const clear_xattr = copy_xattr ? '' : XATTR_USER_PREFIX; await this.set_fs_xattr_op(fs_context, dir_path, fs_xattr, clear_xattr); } - /** - * - * @param {string} file_path - fs context object - * @param {string} key - fs_xattr object to be set on a directory - * @returns {boolean} - describes if the file path describe a directory content - */ - _is_directory_content(file_path, key) { - return (file_path && file_path.endsWith(config.NSFS_FOLDER_OBJECT_NAME)) && (key && key.endsWith('/')); - } - - /** - * @param {string} dir_key - * @param {fs.Dirent} ent - * @returns {string} - */ - _get_entry_key(dir_key, ent, isDir) { - if (ent.name === config.NSFS_FOLDER_OBJECT_NAME) return dir_key; - return dir_key + ent.name + (isDir ? '/' : ''); - } - - /** - * @param {string} dir_key - * @param {fs.Dirent} ent - * @returns {string} - */ - _get_version_entry_key(dir_key, ent) { - if (ent.name === config.NSFS_FOLDER_OBJECT_NAME) return dir_key; - return dir_key + HIDDEN_VERSIONS_PATH + '/' + ent.name; - } - - /** - * @returns {string} - */ - _get_etag(stat) { - const xattr_etag = this._etag_from_fs_xattr(stat.xattr); - if (xattr_etag) return xattr_etag; - // IMPORTANT NOTICE - we must return an etag that contains a dash! - // because this is the criteria of S3 SDK to decide if etag represents md5 - // and perform md5 validation of the data. - return _get_version_id_by_stat(stat); - } - - _etag_from_fs_xattr(xattr) { - if (_.isEmpty(xattr)) return undefined; - return xattr[XATTR_MD5_KEY]; - } - - _number_of_tags_fs_xttr(xattr) { - return Object.keys(xattr).filter(xattr_key => xattr_key.includes(XATTR_TAG)).length; - } - - /** - * @param {string} bucket - * @param {string} key - * @param {nb.NativeFSStats} stat - * @param {Boolean} isDir - * @param {boolean} [is_latest=true] - * @returns {nb.ObjectInfo} - */ - _get_object_info(bucket, key, stat, isDir, is_latest = true) { - const etag = this._get_etag(stat); - const create_time = stat.mtime.getTime(); - const encryption = this._get_encryption_info(stat); - const version_id = ((this._is_versioning_enabled() || this._is_versioning_suspended()) && this._get_version_id_by_xattr(stat)) || - undefined; - const delete_marker = stat.xattr?.[XATTR_DELETE_MARKER] === 'true'; - const dir_content_type = stat.xattr?.[XATTR_DIR_CONTENT] && ((Number(stat.xattr?.[XATTR_DIR_CONTENT]) > 0 && 'application/octet-stream') || 'application/x-directory'); - const content_type = stat.xattr?.[XATTR_CONTENT_TYPE] || - (isDir && dir_content_type) || - mime.getType(key) || 'application/octet-stream'; - const storage_class = s3_utils.parse_storage_class(stat.xattr?.[XATTR_STORAGE_CLASS_KEY]); - const size = Number(stat.xattr?.[XATTR_DIR_CONTENT] || stat.size); - const tag_count = stat.xattr ? this._number_of_tags_fs_xttr(stat.xattr) : 0; - - return { - obj_id: etag, - bucket, - key, - size, - etag, - create_time, - content_type, - encryption, - version_id, - is_latest, - delete_marker, - storage_class, - restore_status: GlacierBackend.get_restore_status(stat.xattr, new Date(), this._get_file_path({key})), - xattr: to_xattr(stat.xattr), - tag_count, - - // temp: - lock_settings: undefined, - md5_b64: undefined, - num_parts: undefined, - sha256_b64: undefined, - stats: undefined, - tagging: undefined, - object_owner: this._get_object_owner() - }; - } - - /** - * _get_object_owner in the future we will return object owner - * currently not implemented because ACLs are not implemented as well - */ - _get_object_owner() { - return undefined; - } - _get_upload_info(stat, version_id) { - const etag = this._get_etag(stat); - const encryption = this._get_encryption_info(stat); + const etag = namespace_fs_util._get_etag(stat); + const encryption = namespace_fs_util._get_encryption_info(stat); return { etag, encryption, @@ -2457,17 +1919,6 @@ class NamespaceFS { }; } - _get_encryption_info(stat) { - // Currently encryption is supported only on top of GPFS, otherwise we will return undefined - return stat.xattr['gpfs.Encryption'] ? { - algorithm: 'AES256', - kms_key_id: '', - context_b64: '', - key_md5_b64: '', - key_b64: '', - } : undefined; - } - // This function verifies the user didn't ask for SSE-S3 Encryption, when Encryption is not supported by the FS _verify_encryption(user_encryption, fs_encryption) { if (user_encryption && user_encryption.algorithm === 'AES256' && !fs_encryption) { @@ -2577,86 +2028,6 @@ class NamespaceFS { } } - async check_access(fs_context, dir_path) { - try { - dbg.log0('check_access: dir_path', dir_path, 'fs_context', fs_context); - await this._check_path_in_bucket_boundaries(fs_context, dir_path); - await nb_native().fs.checkAccess(fs_context, dir_path); - return true; - } catch (err) { - dbg.error('check_access: error ', err.code, err, dir_path, this.bucket_path); - const is_bucket_dir = dir_path === this.bucket_path; - - if (err.code === 'ENOTDIR' && !is_bucket_dir) { - dbg.warn('check_access: the path', dir_path, 'is not a directory'); - return true; - } - // if dir_path is the bucket path we would like to throw an error - // for other dirs we will skip - if (['EPERM', 'EACCES'].includes(err.code) && !is_bucket_dir) { - return false; - } - if (err.code === 'ENOENT' && !is_bucket_dir) { - // invalidate if dir - dir_cache.invalidate({ dir_path, fs_context }); - return false; - } - throw err; - } - } - - /** - * Return false if the entry is outside of the bucket - * @param {*} fs_context - * @param {*} entry_path - * @returns - */ - async _is_path_in_bucket_boundaries(fs_context, entry_path) { - dbg.log1('check_bucket_boundaries: fs_context', fs_context, 'file_path', entry_path, 'this.bucket_path', this.bucket_path); - if (!entry_path.startsWith(this.bucket_path)) { - dbg.log0('check_bucket_boundaries: the path', entry_path, 'is not in the bucket', this.bucket_path, 'boundaries'); - return false; - } - try { - // Returns the real path of the entry. - // The entry path may point to regular file or directory, but can have symbolic links - const full_path = await nb_native().fs.realpath(fs_context, entry_path); - if (!full_path.startsWith(this.bucket_path)) { - dbg.log0('check_bucket_boundaries: the path', entry_path, 'is not in the bucket', this.bucket_path, 'boundaries'); - return false; - } - } catch (err) { - if (err.code === 'ENOTDIR') { - dbg.warn('_is_path_in_bucket_boundaries: the path', entry_path, 'is not a directory'); - return true; - } - // Error: No such file or directory - // In the upload use case, the destination file desn't exist yet, need to validate the parent dirs path. - if (err.code === 'ENOENT') { - return this._is_path_in_bucket_boundaries(fs_context, path.dirname(entry_path)); - } - // Read or search permission was denied for a component of the path prefix. - if (err.code === 'EACCES') { - return false; - } - throw error_utils.new_error_code('INTERNAL_ERROR', - 'check_bucket_boundaries error ' + err.code + ' ' + entry_path + ' ' + err, { cause: err }); - } - return true; - } - - /** - * throws AccessDenied, if the entry is outside of the bucket - * @param {*} fs_context - * @param {*} entry_path - */ - async _check_path_in_bucket_boundaries(fs_context, entry_path) { - if (!config.NSFS_CHECK_BUCKET_BOUNDARIES) return; - if (!(await this._is_path_in_bucket_boundaries(fs_context, entry_path))) { - throw error_utils.new_error_code('EACCES', 'Entry ' + entry_path + ' is not in bucket boundaries'); - } - } - // TODO: without fsync this logic fails also for regular files because blocks take time to update after writing. // async _fail_if_archived_or_sparse_file(fs_context, file_path, stat) { // if (isDirectory(stat)) return; @@ -2673,7 +2044,7 @@ class NamespaceFS { // when obj is a directory and size === 0 folder content (.folder) should not be created empty_dir_content_flow(file_path, params) { - const is_dir_content = this._is_directory_content(file_path, params.key); + const is_dir_content = namespace_fs_util._is_directory_content(file_path, params.key); return is_dir_content && params.size === 0; } /** @@ -2712,8 +2083,8 @@ class NamespaceFS { } _get_version_id_by_mode(stat) { - if (this._is_versioning_enabled()) return _get_version_id_by_stat(stat); - if (this._is_versioning_suspended()) return NULL_VERSION_ID; + if (this._is_versioning_enabled()) return namespace_fs_util._get_version_id_by_stat(stat); + if (this._is_versioning_suspended()) return namespace_fs_util.NULL_VERSION_ID; throw new Error('_get_version_id_by_mode: Invalid versioning mode'); } @@ -2735,7 +2106,7 @@ class NamespaceFS { // returns version path of the form bucket_path/dir/.versions/{key}_{version_id} _get_version_path(key, version_id) { const key_version = path.basename(key) + (version_id ? '_' + version_id : ''); - return path.normalize(path.join(this.bucket_path, path.dirname(key), HIDDEN_VERSIONS_PATH, key_version)); + return path.normalize(path.join(this.bucket_path, path.dirname(key), namespace_fs_util.HIDDEN_VERSIONS_PATH, key_version)); } // this function returns the following version information - @@ -2869,7 +2240,7 @@ class NamespaceFS { let gpfs_options; try { file_path = await this._find_version_path(fs_context, { key, version_id }); - await this._check_path_in_bucket_boundaries(fs_context, file_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, file_path, this.bucket_path); const version_info = await this._get_version_info(fs_context, file_path); if (!version_info) return; @@ -2922,7 +2293,7 @@ class NamespaceFS { let delete_marker_created; let latest_ver_info; const latest_version_path = this._get_file_path({ key }); - await this._check_path_in_bucket_boundaries(fs_context, latest_version_path); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, latest_version_path, this.bucket_path); for (const version_id of versions) { try { if (version_id) { @@ -3057,7 +2428,7 @@ class NamespaceFS { const versioned_path = this._get_version_path(params.key, latest_ver_info.version_id_str); const suspended_and_latest_is_not_null = this._is_versioning_suspended() && - latest_ver_info.version_id_str !== NULL_VERSION_ID; + latest_ver_info.version_id_str !== namespace_fs_util.NULL_VERSION_ID; const bucket_tmp_dir_path = this.get_bucket_tmpdir_full_path(); if (this._is_versioning_enabled() || suspended_and_latest_is_not_null) { await native_fs_utils._make_path_dirs(versioned_path, fs_context); @@ -3096,8 +2467,8 @@ class NamespaceFS { // It can be latest version, old version in .version/ directory or delete marker // This function removes an object version or delete marker with a null version ID inside .version/ directory async _delete_null_version_from_versions_directory(key, fs_context) { - const null_versioned_path = this._get_version_path(key, NULL_VERSION_ID); - await this._check_path_in_bucket_boundaries(fs_context, null_versioned_path); + const null_versioned_path = this._get_version_path(key, namespace_fs_util.NULL_VERSION_ID); + await namespace_fs_util._check_path_in_bucket_boundaries(fs_context, null_versioned_path, this.bucket_path); let gpfs_options; let retries = config.NSFS_RENAME_RETRIES; for (;;) { @@ -3135,10 +2506,10 @@ class NamespaceFS { const stat = await upload_params.target_file.stat(fs_context); if (this._is_versioning_enabled()) { // the delete marker path built from its version info (mtime + ino) - delete_marker_version_id = _get_version_id_by_stat(stat); + delete_marker_version_id = namespace_fs_util._get_version_id_by_stat(stat); } else { // the delete marker file name would be with a 'null' suffix - delete_marker_version_id = NULL_VERSION_ID; + delete_marker_version_id = namespace_fs_util.NULL_VERSION_ID; } const file_path = this._get_version_path(params.key, delete_marker_version_id); @@ -3165,7 +2536,7 @@ class NamespaceFS { // try find prev version by hint or by iterating on .versions/ dir async find_max_version_past(fs_context, key) { - const versions_dir = path.normalize(path.join(this.bucket_path, path.dirname(key), HIDDEN_VERSIONS_PATH)); + const versions_dir = path.normalize(path.join(this.bucket_path, path.dirname(key), namespace_fs_util.HIDDEN_VERSIONS_PATH)); try { const versions = await nb_native().fs.readdir(fs_context, versions_dir); const arr = await P.map_with_concurrency(10, versions, async entry => { @@ -3187,11 +2558,6 @@ class NamespaceFS { } } - _is_hidden_version_path(dir_key) { - const idx = dir_key.indexOf(HIDDEN_VERSIONS_PATH); - return ((idx === 0) || (idx > 0 && dir_key[idx - 1] === '/')); - } - //////////////////////////// /// MOVE & LINK & UNLINK /// //////////////////////////// @@ -3396,4 +2762,3 @@ NamespaceFS._restore_wal = null; module.exports = NamespaceFS; module.exports.multi_buffer_pool = multi_buffer_pool; - diff --git a/src/test/system_tests/test_utils.js b/src/test/system_tests/test_utils.js index 5fe9c0d561..df28bfe13a 100644 --- a/src/test/system_tests/test_utils.js +++ b/src/test/system_tests/test_utils.js @@ -475,6 +475,26 @@ function get_new_buckets_path_by_test_env(new_buckets_full_path, new_buckets_dir return is_nc_coretest ? path.join(new_buckets_full_path, new_buckets_dir) : new_buckets_dir; } + +/** + * common dummy SDK for testing + */ +function make_dummy_object_sdk() { + return { + requesting_account: { + force_md5_etag: false, + nsfs_account_config: { + uid: process.getuid(), + gid: process.getgid(), + } + }, + abort_controller: new AbortController(), + throw_if_aborted() { + if (this.abort_controller.signal.aborted) throw new Error('request aborted signal'); + } + }; +} + /** * write_manual_config_file writes config file directly to the file system without using config FS * used for creating backward compatibility tests, invalid config files etc @@ -629,8 +649,10 @@ exports.get_new_buckets_path_by_test_env = get_new_buckets_path_by_test_env; exports.write_manual_config_file = write_manual_config_file; exports.write_manual_old_account_config_file = write_manual_old_account_config_file; exports.delete_manual_config_file = delete_manual_config_file; +exports.make_dummy_object_sdk = make_dummy_object_sdk; exports.create_redirect_file = create_redirect_file; exports.delete_redirect_file = delete_redirect_file; exports.fail_test_if_default_config_dir_exists = fail_test_if_default_config_dir_exists; exports.create_config_dir = create_config_dir; exports.clean_config_dir = clean_config_dir; + diff --git a/src/test/unit_tests/jest_tests/test_list_object.test.js b/src/test/unit_tests/jest_tests/test_list_object.test.js deleted file mode 100644 index ea1fabdee8..0000000000 --- a/src/test/unit_tests/jest_tests/test_list_object.test.js +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright (C) 2016 NooBaa */ -'use strict'; - - -const fs = require('fs'); -const path = require('path'); -const fs_utils = require('../../../util/fs_utils'); -const nb_native = require('../../../util/nb_native'); -const {TMP_PATH} = require('../../system_tests/test_utils'); -const { get_process_fs_context } = require('../../../util/native_fs_utils'); - -const tmp_fs_path = path.join(TMP_PATH, 'test_list_object'); -const DEFAULT_FS_CONFIG = get_process_fs_context(); - -// eslint-disable-next-line max-lines-per-function -describe('manage list objct flow', () => { - describe('Telldir and Seekdir implementation', () => { - const list_dir_root = path.join(tmp_fs_path, 'list_dir_root'); - const list_dir_1_1 = path.join(list_dir_root, 'list_dir_1_1'); - const total_files = 4; - - beforeAll(async () => { - await fs_utils.create_fresh_path(list_dir_root); - await fs_utils.create_fresh_path(list_dir_1_1); - for (let i = 0; i < total_files; i++) { - create_temp_file(list_dir_root, `test_${i}.json`, {test: test}); - } - }); - - afterAll(async () => { - await fs_utils.folder_delete(`${list_dir_root}`); - await fs_utils.folder_delete(`${list_dir_1_1}`); - }); - - it('telldir returns bigint', async () => { - const dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); - const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); - expect(typeof tell_dir).toStrictEqual('bigint'); - }); - - it('seekdir expects bigint', async () => { - const big_int = 2n ** 32n; - const dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); - const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); - expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, Number(tell_dir))).toThrow(); - expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, 2n ** 32n ** 32n)).toThrow(); - expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, -(2n ** 32n ** 32n))).toThrow(); - // valid scenario - expect(await dir_handle.seekdir(DEFAULT_FS_CONFIG, big_int)).toBeUndefined(); - - }); - - it('list dir files - telldir and seekdir.', async () => { - let tell_dir; - let dir_marker; - let total_dir_entries = 0; - let dir_entry; - let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); - // reak first read after 3 entries. - for (let i = 0; i <= 2; i++) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); - dir_marker = { - dir_path: list_dir_root, - pos: tell_dir, - }; - total_dir_entries += 1; - } - // Continue the read using dir location fetch from telldir - try { - dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); - await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); - for (;;) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - total_dir_entries += 1; - } - await dir_handle.close(DEFAULT_FS_CONFIG); - dir_handle = null; - } catch (err) { - console.log("Error :", err); - } - //total number of dir and files inside list_dir_root is 5 - expect(total_dir_entries).toBe(total_files + 1); - }); - - it('list dir files - Dir.read() and seekdir()', async () => { - let dir_marker; - let total_dir_entries = 0; - let dir_entry; - let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); - // reak first read after 3 entries. - for (let i = 0; i <= 2; i++) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); - //verify tell_dir and dir_entry.off return same value - expect(tell_dir).toBe(dir_entry.off); - dir_marker = { - dir_path: list_dir_root, - pos: dir_entry.off, - }; - total_dir_entries += 1; - } - // Continue the read using dir location fetch from Dir.read() - try { - dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); - await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); - for (;;) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - total_dir_entries += 1; - } - await dir_handle.close(DEFAULT_FS_CONFIG); - dir_handle = null; - } catch (err) { - console.log("Error :", err); - } - //total number of dir and files inside list_dir_root is 5 - expect(total_dir_entries).toBe(total_files + 1); - }); - - it('list 10000 dir files - telldir and seekdir', async () => { - for (let i = total_files; i < total_files + 9995; i++) { - create_temp_file(list_dir_root, `test_${i}.json`, {test: test}); - } - let tell_dir; - let dir_marker; - let total_dir_entries = 0; - let dir_entry; - let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); - // reak first read after 3 entries. - for (let i = 0; i <= 500; i++) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); - dir_marker = { - dir_path: list_dir_root, - pos: tell_dir, - }; - total_dir_entries += 1; - } - // Continue the read using dir location fetch from telldir - try { - dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); - await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); - for (;;) { - dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); - if (!dir_entry) break; - total_dir_entries += 1; - } - await dir_handle.close(DEFAULT_FS_CONFIG); - dir_handle = null; - } catch (err) { - console.log("Error :", err); - } - //total number of dir and files inside list_dir_root is 5 - expect(total_dir_entries).toBe(10000); - }, 10000); - }); -}); - -/** - * create_temp_file would create a file with the data - * @param {string} path_to_dir - * @param {string} file_name - * @param {object} data - */ -async function create_temp_file(path_to_dir, file_name, data) { - const path_to_temp_file_name = path.join(path_to_dir, file_name); - const content = JSON.stringify(data); - await fs.promises.writeFile(path_to_temp_file_name, content); - return path_to_temp_file_name; -} diff --git a/src/test/unit_tests/jest_tests/test_unsort_list_object.test.js b/src/test/unit_tests/jest_tests/test_unsort_list_object.test.js new file mode 100644 index 0000000000..388749f4e7 --- /dev/null +++ b/src/test/unit_tests/jest_tests/test_unsort_list_object.test.js @@ -0,0 +1,541 @@ +/* Copyright (C) 2016 NooBaa */ +/* eslint-disable no-undef */ +'use strict'; + + +const fs = require('fs'); +const path = require('path'); +const fs_utils = require('../../../util/fs_utils'); +const nb_native = require('../../../util/nb_native'); +const { TMP_PATH, make_dummy_object_sdk } = require('../../system_tests/test_utils'); +const { get_process_fs_context } = require('../../../util/native_fs_utils'); +const NamespaceFS = require('../../../sdk/namespace_fs'); +const buffer_utils = require('../../../util/buffer_utils'); +const crypto = require('crypto'); +const config = require('../../../../config'); + +const DEFAULT_FS_CONFIG = get_process_fs_context(); +config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; +const tmp_nsfs_path = path.join(TMP_PATH, 'test_unsort_list_objects'); +const upload_bkt = 'test_ns_uploads_object'; +const src_bkt = 'src'; +const timeout = 50000; +const ns_tmp_bucket_path = `${tmp_nsfs_path}/${src_bkt}`; + +const tmp_ns_nsfs_path = path.join(TMP_PATH, 'test_nsfs_unsort_list'); +const nsfs_src_bkt = 'nsfs_src'; +const ns_nsfs_tmp_bucket_path = `${tmp_ns_nsfs_path}/${nsfs_src_bkt}`; +const list_bkt = 'test_ns_list_object'; + +const files_without_folders_to_upload = make_keys(264, i => `file_without_folder${i}`); +const folders_to_upload = make_keys(264, i => `folder${i}/`); +const files_in_folders_to_upload = make_keys(264, i => `folder1/file${i}`); +const files_in_utf_diff_delimiter = make_keys(264, i => `תיקיה#קובץ${i}`); +const files_in_inner_folders_to_upload_post = make_keys(264, i => `folder1/inner_folder/file${i}`); +const files_in_inner_folders_to_upload_pre = make_keys(264, i => `folder1/ainner_folder/file${i}`); +const dummy_object_sdk = make_dummy_object_sdk(); +const ns_tmp = new NamespaceFS({ bucket_path: ns_tmp_bucket_path, bucket_id: '2', namespace_resource_id: undefined }); +const ns_nsfs_tmp = new NamespaceFS({ bucket_path: ns_nsfs_tmp_bucket_path, bucket_id: '3', namespace_resource_id: undefined }); + +// eslint-disable-next-line max-lines-per-function +describe('manage unsorted list objcts flow', () => { + const keys_objects = make_keys(999, i => `max_keys_test${i}`); + describe('Unsorted List objects ', () => { + const data = crypto.randomBytes(100); + + beforeAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + await fs_utils.create_fresh_path(ns_tmp_bucket_path); + }); + + afterAll(async () => { + await fs_utils.folder_delete(`${ns_tmp_bucket_path}`); + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; + }); + + it('List object unsorted with one object upload', async () => { + const upload_key_2 = 'my_data'; + await ns_tmp.upload_object({ + bucket: upload_bkt, + key: upload_key_2, + source_stream: buffer_utils.buffer_to_read_stream(data), + }, dummy_object_sdk); + const ls_obj_res = await ns_tmp.list_objects({ + bucket: upload_bkt, + delimiter: '/', + }, dummy_object_sdk); + expect(ls_obj_res.objects.map(obj => obj.key)).toStrictEqual([upload_key_2]); + }); + + it('List object unsorted with multiple object upload', async () => { + await create_keys(upload_bkt, ns_tmp, keys_objects); + const ls_obj_res = await ns_tmp.list_objects({ + bucket: upload_bkt, + delimiter: '/', + }, dummy_object_sdk); + expect(ls_obj_res.objects.map(it => it.key).length).toStrictEqual(keys_objects.length + 1); + }); + }); + + describe('Telldir and Seekdir implementation', () => { + const tmp_fs_path = path.join(TMP_PATH, 'test_list_object'); + const list_dir_root = path.join(tmp_fs_path, 'list_dir_root'); + const list_dir_1_1 = path.join(list_dir_root, 'list_dir_1_1'); + const total_files = 4; + beforeAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + await fs_utils.create_fresh_path(list_dir_root); + await fs_utils.create_fresh_path(list_dir_1_1); + for (let i = 0; i < total_files; i++) { + create_temp_file(list_dir_root, `test_${i}.json`, {test: test}); + } + }); + + afterAll(async () => { + await fs_utils.folder_delete(`${list_dir_root}`); + await fs_utils.folder_delete(`${list_dir_1_1}`); + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; + }); + + it('telldir returns bigint', async () => { + const dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); + const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); + expect(typeof tell_dir).toStrictEqual('bigint'); + }); + + it('seekdir expects bigint', async () => { + const big_int = 2n ** 32n; + const dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); + const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); + expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, Number(tell_dir))).toThrow(); + expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, 2n ** 32n ** 32n)).toThrow(); + expect(() => dir_handle.seekdir(DEFAULT_FS_CONFIG, -(2n ** 32n ** 32n))).toThrow(); + // valid scenario + expect(await dir_handle.seekdir(DEFAULT_FS_CONFIG, big_int)).toBeUndefined(); + + }); + + it('list dir files - telldir and seekdir.', async () => { + let tell_dir; + let dir_marker; + let total_dir_entries = 0; + let dir_entry; + let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); + // reak first read after 3 entries. + for (let i = 0; i <= 2; i++) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); + dir_marker = { + dir_path: list_dir_root, + pos: tell_dir, + }; + total_dir_entries += 1; + } + // Continue the read using dir location fetch from telldir + try { + dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); + await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); + for (;;) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + total_dir_entries += 1; + } + await dir_handle.close(DEFAULT_FS_CONFIG); + dir_handle = null; + } catch (err) { + console.log("Error :", err); + } + //total number of dir and files inside list_dir_root is 5 + expect(total_dir_entries).toBe(total_files + 1); + }); + + it('list dir files - Dir.read() and seekdir()', async () => { + let dir_marker; + let total_dir_entries = 0; + let dir_entry; + let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); + // reak first read after 3 entries. + for (let i = 0; i <= 2; i++) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + const tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); + //verify tell_dir and dir_entry.off return same value + expect(tell_dir).toBe(dir_entry.off); + dir_marker = { + dir_path: list_dir_root, + pos: dir_entry.off, + }; + total_dir_entries += 1; + } + // Continue the read using dir location fetch from Dir.read() + try { + dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); + await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); + for (;;) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + total_dir_entries += 1; + } + await dir_handle.close(DEFAULT_FS_CONFIG); + dir_handle = null; + } catch (err) { + console.log("Error :", err); + } + //total number of dir and files inside list_dir_root is 5 + expect(total_dir_entries).toBe(total_files + 1); + }); + + it('list 10000 dir files - telldir and seekdir', async () => { + for (let i = total_files; i < total_files + 9995; i++) { + create_temp_file(list_dir_root, `test_${i}.json`, {test: test}); + } + let tell_dir; + let dir_marker; + let total_dir_entries = 0; + let dir_entry; + let dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, list_dir_root); + // Seak first read after 3 entries. + for (let i = 0; i <= 500; i++) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + tell_dir = await dir_handle.telldir(DEFAULT_FS_CONFIG); + dir_marker = { + dir_path: list_dir_root, + pos: tell_dir, + }; + total_dir_entries += 1; + } + // Continue the read using dir location fetch from telldir + try { + dir_handle = await nb_native().fs.opendir(DEFAULT_FS_CONFIG, dir_marker.dir_path); + await dir_handle.seekdir(DEFAULT_FS_CONFIG, dir_marker.pos); + for (;;) { + dir_entry = await dir_handle.read(DEFAULT_FS_CONFIG); + if (!dir_entry) break; + total_dir_entries += 1; + } + await dir_handle.close(DEFAULT_FS_CONFIG); + dir_handle = null; + } catch (err) { + console.log("Error :", err); + } + //total number of dir and files inside list_dir_root is 5 + expect(total_dir_entries).toBe(10000); + }); + }); + + + describe('list objects - dirs', () => { + beforeAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + await fs_utils.create_fresh_path(ns_tmp_bucket_path); + await create_keys(upload_bkt, ns_tmp, [ + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_without_folders_to_upload, + ...files_in_utf_diff_delimiter, + ...files_in_inner_folders_to_upload_pre, + ...files_in_inner_folders_to_upload_post + ]); + }); + afterAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; + await delete_keys(upload_bkt, ns_tmp, [ + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_without_folders_to_upload, + ...files_in_utf_diff_delimiter, + ...files_in_inner_folders_to_upload_pre, + ...files_in_inner_folders_to_upload_post + ]); + await fs_utils.folder_delete(`${ns_tmp_bucket_path}`); + }, timeout); + it('key_marker=folder229/', async () => { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: "2", + key_marker: 'folder229/' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(false); + expect(r.common_prefixes).toStrictEqual([]); + const fd = folders_to_upload.filter(folder => folder > 'folder229/'); + expect(r.objects.map(it => it.key).sort()).toStrictEqual([...fd, ...files_in_utf_diff_delimiter].sort()); + }); + + it('key_marker=folder229', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: "2", + key_marker: 'folder229' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(false); + expect(r.common_prefixes).toStrictEqual([]); + const fd = folders_to_upload.filter(folder => folder >= 'folder229/'); + expect(r.objects.map(it => it.key).sort()).toEqual([...fd, ...files_in_utf_diff_delimiter].sort()); + }); + + it('key_marker=folder1/', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: "2", + key_marker: 'folder1/' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(true); + expect(r.common_prefixes).toStrictEqual([]); + expect(r.objects.length).toEqual(1000); + expect(r.objects.map(it => it.key)).not.toContain("folder0/"); + }); + + it('key_marker=folder1/file57', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: "2", + key_marker: 'folder1/file57' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(false); + expect(r.common_prefixes).toStrictEqual([]); + const fd = folders_to_upload.filter(folder => folder > 'folder1/'); + const fd1 = files_in_folders_to_upload.filter(folder => folder > 'folder1/file57'); + expect(r.objects.map(it => it.key).sort()).toEqual([...fd1, ...files_in_inner_folders_to_upload_post, + ...fd, ...files_in_utf_diff_delimiter].sort()); + }); + it('key_marker=folder1/inner_folder/file40', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: "2", + key_marker: 'folder1/inner_folder/file40' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(false); + expect(r.common_prefixes).toStrictEqual([]); + const fd1 = files_in_inner_folders_to_upload_post.filter(file => file > 'folder1/inner_folder/file40'); + const fd = folders_to_upload.filter(folder => folder > 'folder1/'); + expect(r.objects.map(it => it.key).sort()).toEqual([...fd1, ...fd, ...files_in_utf_diff_delimiter].sort()); + }); + + it('key_marker=folder1/inner_folder/', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: '2', + key_marker: 'folder1/inner_folder/' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(false); + expect(r.common_prefixes).toStrictEqual([]); + const fd1 = files_in_inner_folders_to_upload_post.filter(file => file > 'folder1/inner_folder/'); + const fd = folders_to_upload.filter(folder => folder > 'folder1/inner_folder/'); + expect(r.objects.map(it => it.key).sort()).toEqual([...fd1, ...fd, ...files_in_utf_diff_delimiter].sort()); + }); + + it('key_marker=folder1/ainner_folder/', async function() { + const r = await ns_tmp.list_objects({ + bucket: upload_bkt, + list_type: '2', + key_marker: 'folder1/ainner_folder/file50' + }, dummy_object_sdk); + expect(r.is_truncated).toStrictEqual(true); + expect(r.common_prefixes).toStrictEqual([]); + expect(r.objects.length).toEqual(1000); + expect(r.objects.map(it => it.key)).not.toContain("folder1/ainner_folder/file50"); + expect(r.objects.map(it => it.key)).not.toContain("folder1/ainner_folder/file49"); + }); + }); + + describe('list objects - pagination', () => { + beforeAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + await fs_utils.create_fresh_path(ns_nsfs_tmp_bucket_path); + await create_keys(list_bkt, ns_nsfs_tmp, [ + ...files_without_folders_to_upload, + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_in_inner_folders_to_upload_post, + ...files_in_inner_folders_to_upload_pre, + ...files_in_utf_diff_delimiter, + ]); + }, timeout); + afterAll(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; + await delete_keys(list_bkt, ns_nsfs_tmp, [ + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_without_folders_to_upload, + ...files_in_utf_diff_delimiter, + ...files_in_inner_folders_to_upload_pre, + ...files_in_inner_folders_to_upload_post + ]); + await fs_utils.folder_delete(`${ns_nsfs_tmp_bucket_path}`); + }); + it('page=1000 and list_type 2', async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + list_type: "2", + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=500 and list_type 2', async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + list_type: "2", + limit: 500, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=250 and list_type 2', async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + list_type: "2", + limit: 250, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=100 and list_type 2', async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = true; + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + list_type: "2", + limit: 100, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=250 and list_type 1', async () => { + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 250, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }); + it('page=500 and list_type 1', async () => { + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 500, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }); + it('page=1000 and list_type 1', async () => { + let r; + let total_items = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + total_items += r.objects.length; + await validat_pagination(r, total_items); + if (!r.next_marker) { + break; + } + } + }); + }); +}); + + +async function validat_pagination(r, total_items) { + if (r.next_marker) { + expect(r.is_truncated).toStrictEqual(true); + } else { + expect(total_items).toEqual(1584); + expect(r.is_truncated).toStrictEqual(false); + } +} +/** + * @param {number} count + * @param {(i:number)=>string} gen + * @returns {string[]} + */ +function make_keys(count, gen) { + const arr = new Array(count); + for (let i = 0; i < count; ++i) arr[i] = gen(i); + arr.sort(); + Object.freeze(arr); + return arr; +} + +async function delete_keys(bkt, nsfs_delete_tmp, keys) { + await nsfs_delete_tmp.delete_multiple_objects({ + bucket: bkt, + objects: keys.map(key => ({ key })), + }, dummy_object_sdk); +} + +async function create_keys(bkt, nsfs_create_tmp, keys) { + return Promise.all(keys.map(async key => { + await nsfs_create_tmp.upload_object({ + bucket: bkt, + key, + content_type: 'application/octet-stream', + source_stream: buffer_utils.buffer_to_read_stream(null), + size: 0 + }, dummy_object_sdk); + })); +} + +/** + * create_temp_file would create a file with the data + * @param {string} path_to_dir + * @param {string} file_name + * @param {object} data + */ +async function create_temp_file(path_to_dir, file_name, data) { + const path_to_temp_file_name = path.join(path_to_dir, file_name); + const content = JSON.stringify(data); + await fs.promises.writeFile(path_to_temp_file_name, content); + return path_to_temp_file_name; +} diff --git a/src/test/unit_tests/test_namespace_fs.js b/src/test/unit_tests/test_namespace_fs.js index 5ff9b6fc31..a461c88390 100644 --- a/src/test/unit_tests/test_namespace_fs.js +++ b/src/test/unit_tests/test_namespace_fs.js @@ -21,10 +21,9 @@ const s3_utils = require('../../endpoint/s3/s3_utils'); const buffer_utils = require('../../util/buffer_utils'); const { S3Error } = require('../../endpoint/s3/s3_errors'); const test_ns_list_objects = require('./test_ns_list_objects'); -const { TMP_PATH } = require('../system_tests/test_utils'); +const { TMP_PATH, make_dummy_object_sdk } = require('../system_tests/test_utils'); const { get_process_fs_context } = require('../../util/native_fs_utils'); const endpoint_stats_collector = require('../../sdk/endpoint_stats_collector'); -const SensitiveString = require('../../util/sensitive_string'); const inspect = (x, max_arr = 5) => util.inspect(x, { colors: true, depth: null, maxArrayLength: max_arr }); @@ -42,31 +41,6 @@ const DEFAULT_FS_CONFIG = get_process_fs_context(); const empty_data = crypto.randomBytes(0); const empty_stream = () => buffer_utils.buffer_to_read_stream(empty_data); -function make_dummy_object_sdk(config_root) { - return { - requesting_account: { - force_md5_etag: false, - nsfs_account_config: { - uid: process.getuid(), - gid: process.getgid(), - } - }, - abort_controller: new AbortController(), - throw_if_aborted() { - if (this.abort_controller.signal.aborted) throw new Error('request aborted signal'); - }, - - read_bucket_sdk_config_info(name) { - return { - bucket_owner: new SensitiveString('dummy-owner'), - owner_account: { - id: 'dummy-id-123', - } - }; - } - }; -} - mocha.describe('namespace_fs', function() { const src_bkt = 'src'; @@ -99,6 +73,7 @@ mocha.describe('namespace_fs', function() { }); mocha.before(async () => { + config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED = false; await P.all(_.map([src_bkt, upload_bkt, mpu_bkt], async buck => fs_utils.create_fresh_path(`${tmp_fs_path}/${buck}`))); }); diff --git a/src/test/unit_tests/test_namespace_fs_mpu.js b/src/test/unit_tests/test_namespace_fs_mpu.js index 9f11327de9..712cdf6229 100644 --- a/src/test/unit_tests/test_namespace_fs_mpu.js +++ b/src/test/unit_tests/test_namespace_fs_mpu.js @@ -17,35 +17,18 @@ const time_utils = require('../../util/time_utils'); const NamespaceFS = require('../../sdk/namespace_fs'); const s3_utils = require('../../endpoint/s3/s3_utils'); const buffer_utils = require('../../util/buffer_utils'); -const { TMP_PATH } = require('../system_tests/test_utils'); +const { TMP_PATH, make_dummy_object_sdk } = require('../system_tests/test_utils'); const endpoint_stats_collector = require('../../sdk/endpoint_stats_collector'); const inspect = (x, max_arr = 5) => util.inspect(x, { colors: true, depth: null, maxArrayLength: max_arr }); const XATTR_MD5_KEY = 'content_md5'; -const DUMMY_OBJECT_SDK = make_DUMMY_OBJECT_SDK(); +const DUMMY_OBJECT_SDK = make_dummy_object_sdk(); const src_bkt = 'src'; const tmp_fs_path = path.join(TMP_PATH, 'test_namespace_fs_mpu'); const ns_tmp_bucket_path = `${tmp_fs_path}/${src_bkt}`; -function make_DUMMY_OBJECT_SDK() { - return { - requesting_account: { - force_md5_etag: false, - nsfs_account_config: { - uid: process.getuid(), - gid: process.getgid(), - } - }, - abort_controller: new AbortController(), - throw_if_aborted() { - if (this.abort_controller.signal.aborted) throw new Error('request aborted signal'); - } - }; -} - - mocha.describe('namespace_fs mpu optimization tests', function() { const upload_bkt = 'test_ns_uploads_object'; diff --git a/src/util/namespace_fs_util.js b/src/util/namespace_fs_util.js new file mode 100644 index 0000000000..d4cc021e14 --- /dev/null +++ b/src/util/namespace_fs_util.js @@ -0,0 +1,867 @@ +/* Copyright (C) 2020 NooBaa */ +/*eslint max-statements: ["error", 80, { "ignoreTopLevelFunctions": true }]*/ +'use strict'; + +const _ = require('lodash'); +const fs = require('fs'); +const path = require('path'); +const config = require('../../config'); +const LRUCache = require('../util/lru_cache'); +const native_fs_utils = require('./native_fs_utils'); +const nb_native = require('./nb_native'); +const dbg = require('../util/debug_module')(__filename); +const error_utils = require('../util/error_utils'); +const s3_utils = require('../endpoint/s3/s3_utils'); +const size_utils = require('../util/size_utils'); + +const NULL_VERSION_ID = 'null'; +const NULL_VERSION_SUFFIX = '_' + NULL_VERSION_ID; +const HIDDEN_VERSIONS_PATH = '.versions'; +const XATTR_USER_PREFIX = 'user.'; +const XATTR_NOOBAA_INTERNAL_PREFIX = XATTR_USER_PREFIX + 'noobaa.'; +const XATTR_DIR_CONTENT = XATTR_NOOBAA_INTERNAL_PREFIX + 'dir_content'; +const XATTR_STORAGE_CLASS_KEY = XATTR_USER_PREFIX + 'storage_class'; +const XATTR_TAG = XATTR_NOOBAA_INTERNAL_PREFIX + 'tag.'; +const XATTR_MD5_KEY = XATTR_USER_PREFIX + 'content_md5'; +const XATTR_METADATA_IGNORE_LIST = [ + XATTR_STORAGE_CLASS_KEY, +]; + +/** + * @typedef {{ +* time: number, +* stat: nb.NativeFSStats, +* usage: number, +* sorted_entries?: fs.Dirent[], +* }} ReaddirCacheItem +* @type {LRUCache} +*/ +const dir_cache = new LRUCache({ + name: 'nsfs-dir-cache', + make_key: ({ dir_path }) => dir_path, + load: async ({ dir_path, fs_context }) => { + const time = Date.now(); + const stat = await nb_native().fs.stat(fs_context, dir_path); + let sorted_entries; + let usage = config.NSFS_DIR_CACHE_MIN_DIR_SIZE; + if (stat.size <= config.NSFS_DIR_CACHE_MAX_DIR_SIZE) { + sorted_entries = await nb_native().fs.readdir(fs_context, dir_path); + sorted_entries.sort(sort_entries_by_name); + for (const ent of sorted_entries) { + usage += ent.name.length + 4; + } + } + return { time, stat, sorted_entries, usage }; + }, + validate: async ({ stat }, { dir_path, fs_context }) => { + const new_stat = await nb_native().fs.stat(fs_context, dir_path); + return (new_stat.ino === stat.ino && new_stat.mtimeNsBigint === stat.mtimeNsBigint); + }, + item_usage: ({ usage }, dir_path) => usage, + max_usage: config.NSFS_DIR_CACHE_MAX_TOTAL_SIZE, +}); + +/** +* @typedef {{ +* time: number, +* stat: nb.NativeFSStats, +* ver_dir_stat: nb.NativeFSStats, +* usage: number, +* sorted_entries?: fs.Dirent[], +* }} ReaddirVersionsCacheItem +* @type {LRUCache} +*/ +const versions_dir_cache = new LRUCache({ + name: 'nsfs-versions-dir-cache', + make_key: ({ dir_path }) => dir_path, + load: async ({ dir_path, fs_context }) => { + const time = Date.now(); + const stat = await nb_native().fs.stat(fs_context, dir_path); + const version_path = dir_path + "/" + HIDDEN_VERSIONS_PATH; + let ver_dir_stat_size; + let is_version_path_exists = false; + let ver_dir_stat; + try { + ver_dir_stat = await nb_native().fs.stat(fs_context, version_path); + ver_dir_stat_size = ver_dir_stat.size; + is_version_path_exists = true; + } catch (err) { + if (err.code === 'ENOENT') { + dbg.log0('NamespaceFS: Version dir not found, ', version_path); + } else { + throw err; + } + ver_dir_stat = null; + ver_dir_stat_size = 0; + } + let sorted_entries; + let usage = config.NSFS_DIR_CACHE_MIN_DIR_SIZE; + if (stat.size + ver_dir_stat_size <= config.NSFS_DIR_CACHE_MAX_DIR_SIZE) { + const latest_versions = await nb_native().fs.readdir(fs_context, dir_path); + if (is_version_path_exists) { + const old_versions = await nb_native().fs.readdir(fs_context, version_path); + // In case we have a null version id inside .versions/ directory we will rename it + // Therefore, old_versions_after_rename will not include an entry with 'null' suffix + // (in case stat fails on a version we would remove it from the array) + const { + renamed_null_versions_set, + old_versions_after_rename + } = await _rename_null_version(old_versions, fs_context, version_path); + const entries = latest_versions.concat(old_versions_after_rename); + sorted_entries = entries.sort(sort_entries_by_name_and_time); + // rename back version to include 'null' suffix. + if (renamed_null_versions_set.size > 0) { + for (const ent of sorted_entries) { + if (renamed_null_versions_set.has(ent.name)) { + const file_name = _get_filename(ent.name); + const version_name_with_null = file_name + NULL_VERSION_SUFFIX; + ent.name = version_name_with_null; + } + } + } + } else { + sorted_entries = latest_versions.sort(sort_entries_by_name); + } + /*eslint no-unused-expressions: ["error", { "allowTernary": true }]*/ + for (const ent of sorted_entries) { + usage += ent.name.length + 4; + } + } + return { time, stat, ver_dir_stat, sorted_entries, usage }; + }, + validate: async ({ stat, ver_dir_stat }, { dir_path, fs_context }) => { + const new_stat = await nb_native().fs.stat(fs_context, dir_path); + const versions_dir_path = path.normalize(path.join(dir_path, '/', HIDDEN_VERSIONS_PATH)); + let new_versions_stat; + try { + new_versions_stat = await nb_native().fs.stat(fs_context, versions_dir_path); + } catch (err) { + if (err.code === 'ENOENT') { + dbg.log0('NamespaceFS: Version dir not found, ', versions_dir_path); + } else { + throw err; + } + } + return (new_stat.ino === stat.ino && + new_stat.mtimeNsBigint === stat.mtimeNsBigint && + new_versions_stat?.ino === ver_dir_stat?.ino && + new_versions_stat?.mtimeNsBigint === ver_dir_stat?.mtimeNsBigint); + }, + item_usage: ({ usage }, dir_path) => usage, + max_usage: config.NSFS_DIR_CACHE_MAX_TOTAL_SIZE, +}); + +/** + * Process list objects entries + * @param {Object} bucket + * @param {fs.Dirent} ent + * @param {string} marker_curr + * @param {string} dir_path + * @param {string} dir_key + * @param {import('../sdk/list_object_fs')} list_obj + * @param {string} pos + */ +async function process_entry({bucket_path, bucket_id }, ent, marker_curr, dir_path, dir_key, list_obj, pos = '') { + + if ((!ent.name.startsWith(list_obj.prefix_ent) || + (pos === '' && ent.name < marker_curr.split('/')[0]) || + ent.name === get_bucket_tmpdir_name(bucket_id) || + ent.name === config.NSFS_FOLDER_OBJECT_NAME) || + _is_hidden_version_path(ent.name)) { + return; + } + const isDir = await is_directory_or_symlink_to_directory(ent, list_obj.fs_context, path.join(dir_path, ent.name)); + let r; + if (list_obj.list_versions && _is_version_or_null_in_file_name(ent.name)) { + r = { + key: _get_version_entry_key(dir_key, ent), + common_prefix: isDir, + is_latest: false, + marker_pos: pos, + pre_dir: dir_key ? dir_key : '/', + }; + } else { + r = { + key: _get_entry_key(dir_key, ent, isDir), + common_prefix: isDir, + is_latest: true, + marker_pos: pos, + pre_dir: dir_key ? dir_key : '/', + }; + } + if (config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED && list_obj.list_type === '2') { + await insert_unsort_entry_to_results_arr(bucket_path, bucket_id, r, list_obj); + } else { + await insert_entry_to_results_arr(bucket_path, bucket_id, r, list_obj); + } +} + +/** + * Process sorted list objects entries + * @param {string} bucket_path + * @param {string} bucket_id + * @param {Object} r + * @param {import('../sdk/list_object_fs')} list_obj + */ +async function insert_entry_to_results_arr(bucket_path, bucket_id, r, list_obj) { + let pos; + // Since versions are arranged next to latest object in the latest first order, + // no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as + // they are in order + if (list_obj.results.length && r.key < list_obj.results[list_obj.results.length - 1].key && + !_is_hidden_version_path(r.key)) { + pos = _.sortedLastIndexBy(list_obj.results, r, a => a.key); + } else { + pos = list_obj.results.length; + } + if (pos >= list_obj.limit) { + list_obj.is_truncated = true; + return; // not added + } + if (!list_obj.delimiter && r.common_prefix) { + await process_dir(bucket_path, bucket_id, r.key, list_obj); + } else { + if (list_obj.keymarker.key_marker_value === r.key) { + return; + } + if (pos < list_obj.results.length) { + list_obj.results.splice(pos, 0, r); + } else { + list_obj.results.push(r); + } + if (list_obj.results.length > list_obj.limit) { + list_obj.results.length = list_obj.limit; + list_obj.is_truncated = true; + } + } +} + +/** + * Process unsorted list objects entries + * @param {string} bucket_path + * @param {string} bucket_id + * @param {Object} r + * @param {import('../sdk/list_object_fs')} list_obj + */ +async function insert_unsort_entry_to_results_arr(bucket_path, bucket_id, r, list_obj) { + // Since versions are arranged next to latest object in the latest first order, + // no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as + // they are in order + const pos = list_obj.results.length; + if (pos >= list_obj.limit) { + if (list_obj.keymarker.last_pre_dir) { + await list_obj.keymarker.add_previour_dir(list_obj.keymarker.last_pre_dir, list_obj.keymarker.last_pre_dir_position); + } + list_obj.is_truncated = true; + return; // not added + } + if (!list_obj.delimiter && r.common_prefix) { + if (r.marker_pos && !list_obj.keymarker.pre_dir.includes(r.pre_dir)) { + await list_obj.keymarker.add_previour_dir(r.pre_dir, r.marker_pos); + } + await process_dir(bucket_path, bucket_id, r.key, list_obj); + } else { + if (list_obj.keymarker.key_marker_value === r.key) { + return; + } + list_obj.results.push(r); + if (list_obj.results.length > list_obj.limit) { + list_obj.results.length = list_obj.limit; + list_obj.is_truncated = true; + } + await list_obj.keymarker.update_last_previour_dir('', ''); + } +} + +/** + * Process unsorted list objects dir path + * @param {string} bucket_path + * @param {string} dir_key + * @param {string} marker_curr + * @param {string} dir_path + * @param {import('../sdk/list_object_fs')} list_obj + */ +async function process_unsort_entry(bucket_path, bucket_id, dir_key, marker_curr, dir_path, list_obj, dir_handle) { + for (;;) { + if (list_obj.is_truncated) break; + const dir_entry = await dir_handle.read(list_obj.fs_context); + // After listing the last item from sub dir, check for parent dirs and parent directory position + // and go back to that position. + if (!dir_entry) { + // Skip item listing in bucket root path when list flow coming from sub dir to bucket root dir, + // if do not skip items in bucket root path will list two times, + // first in normal flow, and second when return back from sub dir. + if (bucket_path + '/' === dir_path) { + list_obj.skip_list = true; + } + // After iterating the last element in subdir flow will go back to the parent folder, + // to avoid listing items again from start use previous dir path and position from + // previous_dirs and previous_dir_positions arrays respectively. + if (list_obj.keymarker.pre_dir.length > 0) { + list_obj.keymarker.last_pre_dir = list_obj.keymarker.pre_dir.pop(); + list_obj.keymarker.last_pre_dir_position = list_obj.keymarker.pre_dir_pos.pop(); + // Next dir process will use the previous dir path and position to iterate from + // the previously left parentt dir position. + await list_obj.keymarker.update_key_marker(list_obj.keymarker.last_pre_dir, list_obj.keymarker.last_pre_dir_position); + await process_dir(bucket_path, bucket_id, list_obj.keymarker.last_pre_dir, list_obj); + } + break; + } + if ((dir_entry.name === config.NSFS_FOLDER_OBJECT_NAME && dir_key === list_obj.marker_dir) || list_obj.skip_list) { + continue; + } + await process_entry({bucket_path, bucket_id}, dir_entry, marker_curr, dir_path, dir_key, list_obj, (list_obj.param_key_marker && !list_obj.keymarker.is_unsorted) ? '' : dir_entry.off); + } +} + +/** + * Process objects dir path + * @param {string} bucket_path + * @param {string} bucket_id + * @param {string} dir_key + * @param {import('../sdk/list_object_fs')} list_obj + * @returns {Promise} + */ +async function process_dir(bucket_path, bucket_id, dir_key, list_obj) { + if (_is_hidden_version_path(dir_key)) { + return; + } + // /** @type {fs.Dir} */ + let dir_handle; + /** @type {ReaddirCacheItem} */ + let cached_dir; + const dir_path = path.join(bucket_path, dir_key); + const prefix_dir = list_obj.prefix.slice(0, dir_key.length); + const prefix_ent = list_obj.prefix.slice(dir_key.length); + if (!dir_key.startsWith(prefix_dir)) { + // dbg.log0(`prefix dir does not match so no keys in this dir can apply: dir_key=${dir_key} prefix_dir=${prefix_dir}`); + return; + } + + const marker_dir = list_obj.keymarker.key_marker_value.slice(0, dir_key.length); + const marker_ent = list_obj.keymarker.key_marker_value.slice(dir_key.length); + // marker is after dir so no keys in this dir can apply + if (dir_key < marker_dir) { + // dbg.log0(`marker is after dir so no keys in this dir can apply: dir_key=${dir_key} marker_dir=${marker_dir}`); + return; + } + // when the dir portion of the marker is completely below the current dir + // then every key in this dir satisfies the marker and marker_ent should not be used. + + const marker_curr = (marker_dir < dir_key) ? '' : marker_ent; + // dbg.log0(`process_dir: dir_key=${dir_key} prefix_ent=${prefix_ent} marker_curr=${marker_curr}`); + + await list_obj.update_process_dir_properties(prefix_ent, marker_curr, dir_path); + const push_dir_entries = async (marker_index, sorted_entries) => { + if (marker_index) { + const prev_dir = sorted_entries[marker_index - 1]; + const prev_dir_name = prev_dir.name; + if (marker_curr.startsWith(prev_dir_name) && dir_key !== prev_dir.name) { + if (!list_obj.delimiter) { + const isDir = await is_directory_or_symlink_to_directory( + prev_dir, list_obj.fs_context, path.join(dir_path, prev_dir_name, '/')); + if (isDir) { + const prev_dir_key = path.join(dir_key, prev_dir_name, '/'); + await process_dir(bucket_path, bucket_id, prev_dir_key, list_obj); + } + } + } + } + }; + + if (!(await check_access(list_obj.fs_context, dir_path, bucket_path))) return; + try { + if (list_obj.list_versions) { + cached_dir = await versions_dir_cache.get_with_cache({ dir_path, fs_context: list_obj.fs_context }); + } else { + cached_dir = await dir_cache.get_with_cache({ dir_path, fs_context: list_obj.fs_context }); + } + } catch (err) { + if (['ENOENT', 'ENOTDIR'].includes(err.code)) { + dbg.log0('NamespaceFS: no keys for non existing dir', dir_path); + return; + } + throw err; + } + + // insert dir object to objects list if its key is lexicographicly bigger than the key marker && + // no delimiter OR prefix is the current directory entry + const is_dir_content = cached_dir.stat.xattr && cached_dir.stat.xattr[XATTR_DIR_CONTENT]; + if (is_dir_content && dir_key > list_obj.keymarker.key_marker_value && (!list_obj.delimiter || dir_key === list_obj.prefix)) { + const r = { key: dir_key, common_prefix: false }; + if (config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED && list_obj.list_type === '2') { + await insert_unsort_entry_to_results_arr(bucket_path, bucket_id, r, list_obj); + } else { + await insert_entry_to_results_arr(bucket_path, bucket_id, r, list_obj); + } + } + + if (!config.NSFS_LIST_OBJECTS_V2_UNSORTED_ENABLED && cached_dir.sorted_entries) { + const sorted_entries = cached_dir.sorted_entries; + let marker_index; + // Two ways followed here to find the index. + // 1. When inside marker_dir: Here the entries are sorted based on time. Here + // FindIndex() is called since sortedLastIndexBy() expects sorted order by name + // 2. When marker_dir above dir_path: sortedLastIndexBy() is called since entries are + // sorted by name + // 3. One of the below conditions, marker_curr.includes('/') checks whether + // the call is for the directory that contains marker_curr + if (list_obj.list_versions && marker_curr && !marker_curr.includes('/')) { + let start_marker = marker_curr; + if (list_obj.version_id_marker) start_marker = list_obj.version_id_marker; + marker_index = _.findIndex( + sorted_entries, + {name: start_marker} + ) + 1; + } else { + marker_index = _.sortedLastIndexBy( + sorted_entries, + make_named_dirent(marker_curr), + get_entry_name + ); + } + + // handling a scenario in which key_marker points to an object inside a directory + // since there can be entries inside the directory that will need to be pushed + // to results array + await push_dir_entries(marker_index, sorted_entries); + for (let i = marker_index; i < sorted_entries.length; ++i) { + const ent = sorted_entries[i]; + // when entry is NSFS_FOLDER_OBJECT_NAME=.folder file, + // and the dir key marker is the name of the curr directory - skip on adding it + if (ent.name === config.NSFS_FOLDER_OBJECT_NAME && dir_key === marker_dir) { + continue; + } + await process_entry({bucket_path, bucket_id}, ent, marker_curr, dir_path, dir_key, list_obj); + // since we traverse entries in sorted order, + // we can break as soon as enough keys are collected. + if (list_obj.is_truncated) break; + } + return; + } + // for large dirs we cannot keep all entries in memory + // so we have to stream the entries one by one while filtering only the needed ones. + try { + if (list_obj.list_type === '2') { + // For unsorted listing dir position is used to when pagination split the items. + dbg.warn('NamespaceFS: open unsorted dir streaming', dir_path, 'size', cached_dir.stat.size, 'key_marker', list_obj.keymarker); + dir_handle = await nb_native().fs.opendir(list_obj.fs_context, dir_path); //, { bufferSize: 128 }); + if (list_obj.keymarker.marker_pos) { + await dir_handle.seekdir(list_obj.fs_context, BigInt(list_obj.keymarker.marker_pos)); + list_obj.keymarker.marker_pos = undefined; + } + await process_unsort_entry(bucket_path, bucket_id, dir_key, marker_curr, dir_path, list_obj, dir_handle); + } else { + dbg.warn('NamespaceFS: open dir streaming', dir_path, 'size', cached_dir.stat.size); + dir_handle = await nb_native().fs.opendir(list_obj.fs_context, dir_path); //, { bufferSize: 128 }); + for (;;) { + const dir_entry = await dir_handle.read(list_obj.fs_context); + if (!dir_entry) break; + await process_entry({bucket_path, bucket_id}, dir_entry, marker_curr, dir_path, dir_key, list_obj); + // since we dir entries streaming order is not sorted, + // we have to keep scanning all the keys before we can stop. + } + } + await dir_handle.close(list_obj.fs_context); + dir_handle = null; + } finally { + if (dir_handle) { + try { + dbg.warn('NamespaceFS: close dir streaming', dir_path, 'size', cached_dir.stat.size); + await dir_handle.close(list_obj.fs_context); + } catch (err) { + dbg.error('NamespaceFS: close dir failed', err); + } + dir_handle = null; + } + } +} + + +function _get_filename(file_name) { + if (_is_version_object(file_name)) { + return file_name.substring(0, file_name.indexOf('_mtime-')); + } else if (_is_version_null_version(file_name)) { + return file_name.substring(0, file_name.indexOf(NULL_VERSION_SUFFIX)); + } + return file_name; +} + +/** + * + * @param {*} stat - entity stat yo check + * @param {*} fs_context - account config using to check symbolic links + * @param {*} entry_path - path of symbolic link + * @returns + */ +async function is_directory_or_symlink_to_directory(stat, fs_context, entry_path) { + try { + let r = native_fs_utils.isDirectory(stat); + if (!r && is_symbolic_link(stat)) { + const targetStat = await nb_native().fs.stat(fs_context, entry_path); + if (!targetStat) throw new Error('is_directory_or_symlink_to_directory: targetStat is empty'); + r = native_fs_utils.isDirectory(targetStat); + } + return r; + } catch (err) { + if (err.code !== 'ENOENT') { + throw err; + } + } +} + +/** + * Return false if the entry is outside of the bucket + * @param {*} fs_context + * @param {*} entry_path + * @param {string} bucket_path + * @returns + */ +async function _is_path_in_bucket_boundaries(fs_context, entry_path, bucket_path) { + dbg.log1('check_bucket_boundaries: fs_context', fs_context, 'file_path', entry_path, 'bucket_path', bucket_path); + if (!entry_path.startsWith(bucket_path)) { + dbg.log0('check_bucket_boundaries: the path', entry_path, 'is not in the bucket', bucket_path, 'boundaries'); + return false; + } + try { + // Returns the real path of the entry. + // The entry path may point to regular file or directory, but can have symbolic links + const full_path = await nb_native().fs.realpath(fs_context, entry_path); + if (!full_path.startsWith(bucket_path)) { + dbg.log0('check_bucket_boundaries: the full path', full_path, 'is not in the bucket', bucket_path, 'boundaries'); + return false; + } + } catch (err) { + if (err.code === 'ENOTDIR') { + dbg.warn('_is_path_in_bucket_boundaries: the path', entry_path, 'is not a directory'); + return true; + } + // Error: No such file or directory + // In the upload use case, the destination file desn't exist yet, need to validate the parent dirs path. + if (err.code === 'ENOENT') { + return _is_path_in_bucket_boundaries(fs_context, path.dirname(entry_path), bucket_path); + } + // Read or search permission was denied for a component of the path prefix. + if (err.code === 'EACCES') { + return false; + } + throw error_utils.new_error_code('INTERNAL_ERROR', + 'check_bucket_boundaries error ' + err.code + ' ' + entry_path + ' ' + err, { cause: err }); + } + return true; +} + +/** + * throws AccessDenied, if the entry is outside of the bucket + * @param {*} fs_context + * @param {*} entry_path + * @param {string} bucket_path + */ +async function _check_path_in_bucket_boundaries(fs_context, entry_path, bucket_path) { + if (!config.NSFS_CHECK_BUCKET_BOUNDARIES) return; + if (!(await _is_path_in_bucket_boundaries(fs_context, entry_path, bucket_path))) { + throw error_utils.new_error_code('EACCES', 'Entry ' + entry_path + ' is not in bucket boundaries'); + } +} + +async function check_access(fs_context, dir_path, bucket_path) { + try { + dbg.log0('check_access: dir_path', dir_path, 'fs_context', fs_context); + await _check_path_in_bucket_boundaries(fs_context, dir_path, bucket_path); + await nb_native().fs.checkAccess(fs_context, dir_path); + return true; + } catch (err) { + dbg.error('check_access: error ', err.code, err, dir_path, bucket_path); + const is_bucket_dir = dir_path === bucket_path; + + if (err.code === 'ENOTDIR' && !is_bucket_dir) { + dbg.warn('check_access: the path', dir_path, 'is not a directory'); + return true; + } + // if dir_path is the bucket path we would like to throw an error + // for other dirs we will skip + if (['EPERM', 'EACCES'].includes(err.code) && !is_bucket_dir) { + return false; + } + if (err.code === 'ENOENT' && !is_bucket_dir) { + // invalidate if dir + dir_cache.invalidate({ dir_path, fs_context }); + return false; + } + throw err; + } +} + +function _get_mtime_from_filename(filename) { + if (!_is_version_object(filename)) { + // Latest file wont have time suffix which will push the latest + // object last in the list. So to keep the order maintained, + // returning the latest time. Multiplying with 1e6 to provide + // nano second precision + return BigInt(Date.now() * 1e6); + } + const file_parts = filename.split('-'); + return size_utils.string_to_bigint(file_parts[file_parts.length - 3], 36); +} + +// This is helper function for list object version +// In order to sort the entries by name we would like to change the name of files with suffix of '_null' +// to have the structure of _mtime-...-ino-... as version id. +// This function returns a set that contains all file names that were changed (after change) +// and an array old_versions_after_rename which is old_versions without the versions that stat failed on +async function _rename_null_version(old_versions, fs_context, version_path) { + const renamed_null_versions_set = new Set(); + const old_versions_after_rename = []; + + for (const old_version of old_versions) { + if (_is_version_null_version(old_version.name)) { + try { + const stat = await nb_native().fs.stat(fs_context, path.join(version_path, old_version.name)); + const mtime_ino = _get_version_id_by_stat(stat); + const original_name = _get_filename(old_version.name); + const version_with_mtime_ino = original_name + '_' + mtime_ino; + old_version.name = version_with_mtime_ino; + renamed_null_versions_set.add(version_with_mtime_ino); + } catch (err) { + // to cover an edge case where stat fails + // for example another process deleted an object and we get ENOENT + // just before executing this command but after the starting list object versions + dbg.error(`_rename_null_version of ${old_version.name} got error:`, err); + old_version.name = undefined; + } + } + if (old_version.name) old_versions_after_rename.push(old_version); + } + return { renamed_null_versions_set, old_versions_after_rename }; +} + + +/** + * @param {fs.Dirent} first_entry + * @param {fs.Dirent} second_entry + * @returns {Number} + */ +function sort_entries_by_name_and_time(first_entry, second_entry) { + const first_entry_name = _get_filename(first_entry.name); + const second_entry_name = _get_filename(second_entry.name); + if (first_entry_name === second_entry_name) { + const first_entry_mtime = _get_mtime_from_filename(first_entry.name); + const second_entry_mtime = _get_mtime_from_filename(second_entry.name); + // To sort the versions in the latest first order, + // below logic is followed + if (second_entry_mtime < first_entry_mtime) return -1; + if (second_entry_mtime > first_entry_mtime) return 1; + return 0; + } else { + if (first_entry_name < second_entry_name) return -1; + if (first_entry_name > second_entry_name) return 1; + return 0; + } +} + +function to_xattr(fs_xattr) { + const xattr = _.mapKeys(fs_xattr, (val, key) => { + // Prioritize ignore list + if (XATTR_METADATA_IGNORE_LIST.includes(key)) return ''; + + // Fallback to rules + + if (key.startsWith(XATTR_USER_PREFIX) && !key.startsWith(XATTR_NOOBAA_INTERNAL_PREFIX)) { + return key.slice(XATTR_USER_PREFIX.length); + } + + return ''; + }); + + // keys which do not start with prefix will all map to the empty string key, so we remove it once + delete xattr['']; + // @ts-ignore + xattr[s3_utils.XATTR_SORT_SYMBOL] = true; + return xattr; +} + + +function is_symbolic_link(stat) { + if (!stat) throw new Error('isSymbolicLink: stat is empty'); + if (stat.mode) { + // eslint-disable-next-line no-bitwise + return (((stat.mode) & nb_native().fs.S_IFMT) === nb_native().fs.S_IFLNK); + } else if (stat.type) { + return stat.type === nb_native().fs.DT_LNK; + } else { + throw new Error(`isSymbolicLink: stat ${stat} is not supported`); + } +} + +function _is_version_or_null_in_file_name(filename) { + const is_version_object = _is_version_object(filename); + if (!is_version_object) { + return _is_version_null_version(filename); + } + return is_version_object; +} + +function _is_version_object(filename) { + const mtime_substr_index = filename.indexOf('_mtime-'); + if (mtime_substr_index < 0) return false; + const ino_substr_index = filename.indexOf('-ino-'); + return ino_substr_index > mtime_substr_index; +} + +function _is_version_null_version(filename) { + return filename.endsWith(NULL_VERSION_SUFFIX); +} + +/** + * @param {string} bucket_id + * @returns {string} + */ +function get_bucket_tmpdir_name(bucket_id) { + return native_fs_utils.get_bucket_tmpdir_name(bucket_id); +} + +function _is_hidden_version_path(dir_key) { + const idx = dir_key.indexOf(HIDDEN_VERSIONS_PATH); + return ((idx === 0) || (idx > 0 && dir_key[idx - 1] === '/')); +} + +/** + * @param {string} dir_key + * @param {fs.Dirent} ent + * @returns {string} + */ +function _get_entry_key(dir_key, ent, isDir) { + if (ent.name === config.NSFS_FOLDER_OBJECT_NAME) return dir_key; + return dir_key + ent.name + (isDir ? '/' : ''); +} + + /** + * @param {string} dir_key + * @param {fs.Dirent} ent + * @returns {string} + */ +function _get_version_entry_key(dir_key, ent) { + if (ent.name === config.NSFS_FOLDER_OBJECT_NAME) return dir_key; + return dir_key + HIDDEN_VERSIONS_PATH + '/' + ent.name; +} + +/** + * + * @param {string} file_path - fs context object + * @param {string} key - fs_xattr object to be set on a directory + * @returns {boolean} - describes if the file path describe a directory content +*/ +function _is_directory_content(file_path, key) { + return (file_path && file_path.endsWith(config.NSFS_FOLDER_OBJECT_NAME)) && (key && key.endsWith('/')); +} + +/** + * @param {fs.Dirent} e + * @returns {string} + */ +function get_entry_name(e) { + return e.name; +} + +/** + * @param {string} name + * @returns {fs.Dirent} + */ +function make_named_dirent(name) { + const entry = new fs.Dirent(); + entry.name = name; + return entry; +} + +/** + * @param {fs.Dirent} a + * @param {fs.Dirent} b + * @returns {1|-1|0} + */ +function sort_entries_by_name(a, b) { + if (a.name < b.name) return -1; + if (a.name > b.name) return 1; + return 0; +} + +function _get_version_id_by_stat({ino, mtimeNsBigint}) { + // TODO: GPFS might require generation number to be added to version_id + return 'mtime-' + mtimeNsBigint.toString(36) + '-ino-' + ino.toString(36); +} + +/** + * @returns {string} + */ +function _get_etag(stat) { + const xattr_etag = _etag_from_fs_xattr(stat.xattr); + if (xattr_etag) return xattr_etag; + // IMPORTANT NOTICE - we must return an etag that contains a dash! + // because this is the criteria of S3 SDK to decide if etag represents md5 + // and perform md5 validation of the data. + return _get_version_id_by_stat(stat); +} + +function _get_encryption_info(stat) { + // Currently encryption is supported only on top of GPFS, otherwise we will return undefined + return stat.xattr['gpfs.Encryption'] ? { + algorithm: 'AES256', + kms_key_id: '', + context_b64: '', + key_md5_b64: '', + key_b64: '', + } : undefined; +} + +function _etag_from_fs_xattr(xattr) { + if (_.isEmpty(xattr)) return undefined; + return xattr[XATTR_MD5_KEY]; +} + +function _number_of_tags_fs_xttr(xattr) { + return Object.keys(xattr).filter(xattr_key => xattr_key.includes(XATTR_TAG)).length; +} + +/** + * _get_object_owner in the future we will return object owner + * currently not implemented because ACLs are not implemented as well + */ +function _get_object_owner() { + return undefined; +} + +exports.is_symbolic_link = is_symbolic_link; +exports.insert_unsort_entry_to_results_arr = insert_unsort_entry_to_results_arr; +exports.insert_entry_to_results_arr = insert_entry_to_results_arr; +exports.process_entry = process_entry; +exports.process_unsort_entry = process_unsort_entry; +exports.is_directory_or_symlink_to_directory = is_directory_or_symlink_to_directory; +exports._is_version_object = _is_version_object; +exports._is_version_null_version = _is_version_null_version; +exports._is_hidden_version_path = _is_hidden_version_path; +exports._get_filename = _get_filename; +exports.process_dir = process_dir; +exports.get_entry_name = get_entry_name; +exports.make_named_dirent = make_named_dirent; +exports.to_xattr = to_xattr; +exports.check_access = check_access; +exports._get_version_id_by_stat = _get_version_id_by_stat; +exports._get_etag = _get_etag; +exports._get_encryption_info = _get_encryption_info; +exports._number_of_tags_fs_xttr = _number_of_tags_fs_xttr; +exports._get_object_owner = _get_object_owner; +exports._check_path_in_bucket_boundaries = _check_path_in_bucket_boundaries; +exports._is_path_in_bucket_boundaries = _is_path_in_bucket_boundaries; +exports._is_directory_content = _is_directory_content; +exports._etag_from_fs_xattr = _etag_from_fs_xattr; + +exports.NULL_VERSION_SUFFIX = NULL_VERSION_SUFFIX; +exports.NULL_VERSION_ID = NULL_VERSION_ID; +exports.NULL_VERSION_SUFFIX = NULL_VERSION_SUFFIX; +exports.HIDDEN_VERSIONS_PATH = HIDDEN_VERSIONS_PATH; +exports.XATTR_USER_PREFIX = XATTR_USER_PREFIX; +exports.XATTR_NOOBAA_INTERNAL_PREFIX = XATTR_NOOBAA_INTERNAL_PREFIX; +exports.XATTR_DIR_CONTENT = XATTR_DIR_CONTENT; +exports.XATTR_STORAGE_CLASS_KEY = XATTR_STORAGE_CLASS_KEY; +exports.XATTR_TAG = XATTR_TAG; +exports.XATTR_MD5_KEY = XATTR_MD5_KEY; +exports.XATTR_METADATA_IGNORE_LIST = XATTR_METADATA_IGNORE_LIST;