-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NSFS | versioning | calculate multipart upload version by creation time #8644
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
#include <sys/fcntl.h> | ||
#include <sys/stat.h> | ||
#include <sys/syscall.h> | ||
#include <sys/time.h> | ||
#include <sys/types.h> | ||
#include <sys/uio.h> | ||
#include <sys/xattr.h> | ||
|
@@ -1351,6 +1352,47 @@ struct GetPwName : public FSWorker | |
} | ||
}; | ||
|
||
/** | ||
* Utimensat is an fs op | ||
* change file times with nanosecond precision (access and modification times) | ||
* see https://man7.org/linux/man-pages/man3/utimensat.3p.html | ||
*/ | ||
struct Utimensat : public FSWorker | ||
{ | ||
std::string _path; | ||
struct timespec _times[2]; | ||
Utimensat(const Napi::CallbackInfo& info) | ||
: FSWorker(info) | ||
{ | ||
_path = info[1].As<Napi::String>(); | ||
Napi::Object napi_times = info[2].As<Napi::Object>(); | ||
std::string new_atime; | ||
get_time_from_info(napi_times, "actime", _times[0], new_atime); | ||
std::string new_mtime; | ||
get_time_from_info(napi_times, "modtime", _times[1], new_mtime); | ||
Begin(XSTR() << "utimensat " << DVAL(_path) << DVAL(new_atime) << DVAL(new_mtime)); | ||
} | ||
virtual void Work() | ||
{ | ||
SYSCALL_OR_RETURN(utimensat(AT_FDCWD, _path.c_str(), _times, 0)); | ||
} | ||
|
||
void get_time_from_info(const Napi::Object& napi_times, const std::string& time_type, struct timespec& timeval, std::string& log_str) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls move helper functions to not be in the middle of the workers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get_time_from_info is only relevant to utimensat, so I added it as a member function to the class. where would you propose to move it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw helper functions both at the top and at the bottom of this file, so check what you think is the best location |
||
if(napi_times.Get(time_type).IsBigInt()) { | ||
const Napi::BigInt bigint = napi_times.Get(time_type).As<Napi::BigInt>(); | ||
//TODO: handle lossless | ||
bool lossless = true; | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const int64_t time = bigint.Int64Value(&lossless); | ||
timeval.tv_sec = time / int64_t(1e9); | ||
timeval.tv_nsec = time % int64_t(1e9); | ||
log_str = std::to_string(time); | ||
} else { | ||
timeval.tv_nsec = UTIME_OMIT; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when will this happen? was this flow tested? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will happen when one of the times is undefined. it means the time will not be changed. it is tested in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but in namespaceFS we always take a defined mtime, I mean, I don't see how we can reach this code from our flow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function is used both by mtime and atime. currently we always get here for atime (I can also see this in the logs). also I think its good to leave an option to just change the atime and not the mtime, in case we want to use this function elsewhere There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got you, so for atime this flag will just keep it as it is since we pass only mtime |
||
log_str = "UTIME_OMIT"; | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
}; | ||
|
||
struct FileWrap : public Napi::ObjectWrap<FileWrap> | ||
{ | ||
std::string _path; | ||
|
@@ -1374,6 +1416,7 @@ struct FileWrap : public Napi::ObjectWrap<FileWrap> | |
InstanceMethod<&FileWrap::flock>("flock"), | ||
InstanceMethod<&FileWrap::fcntllock>("fcntllock"), | ||
InstanceAccessor<&FileWrap::getfd>("fd"), | ||
InstanceMethod<&FileWrap::utimensat>("utimensat"), | ||
})); | ||
constructor.SuppressDestruct(); | ||
} | ||
|
@@ -1403,6 +1446,7 @@ struct FileWrap : public Napi::ObjectWrap<FileWrap> | |
Napi::Value getfd(const Napi::CallbackInfo& info); | ||
Napi::Value flock(const Napi::CallbackInfo& info); | ||
Napi::Value fcntllock(const Napi::CallbackInfo& info); | ||
Napi::Value utimensat(const Napi::CallbackInfo& info); | ||
}; | ||
|
||
Napi::FunctionReference FileWrap::constructor; | ||
|
@@ -2320,6 +2364,7 @@ fs_napi(Napi::Env env, Napi::Object exports) | |
exports_fs["getsinglexattr"] = Napi::Function::New(env, api<GetSingleXattr>); | ||
exports_fs["getpwname"] = Napi::Function::New(env, api<GetPwName>); | ||
exports_fs["symlink"] = Napi::Function::New(env, api<Symlink>); | ||
exports_fs["utimensat"] = Napi::Function::New(env, api<Utimensat>); | ||
|
||
FileWrap::init(env); | ||
exports_fs["open"] = Napi::Function::New(env, api<FileOpen>); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1505,6 +1505,16 @@ class NamespaceFS { | |
await this._delete_null_version_from_versions_directory(key, fs_context); | ||
} | ||
} | ||
//in case new version is not the latest move straight to .versions dir. can happen for multipart upload | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const prev_version_info = latest_ver_info || await this.find_max_version_past(fs_context, key); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to notice here that the call to find_max_version_past() will do readdir of the .versions/ dir, search for files with the same key and then stat all of them and finds the max, this might create performance decrease on every PUT.. is there anything we can do to improve these performance issues? maybe using the cache? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, I can look into it. it maybe good to use it in other places where we call find_max_version_past. also note that I only call this function in case there is no latest version. meaning when the latest is a delete marker There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nadavMiz when latest is a delete marker and when it's a new object - the first version, which is the most common case, we will read the whole .versions/ folder |
||
if (this._is_versioning_enabled() && prev_version_info && | ||
this._is_version_more_recent(prev_version_info.version_id_str, new_ver_info.version_id_str)) { | ||
const new_versioned_path = this._get_version_path(key, new_ver_info.version_id_str); | ||
dbg.log1('NamespaceFS._move_to_dest_version version ID of key is not the latest move to .versions '); | ||
await native_fs_utils.safe_move(fs_context, new_ver_tmp_path, new_versioned_path, new_ver_info, | ||
gpfs_options?.move_source_to_version, bucket_tmp_dir_path); | ||
break; //since moving staight to .version, no need to change the latest version | ||
} | ||
if (latest_ver_info && | ||
((this._is_versioning_enabled()) || | ||
(this._is_versioning_suspended() && latest_ver_info.version_id_str !== NULL_VERSION_ID))) { | ||
|
@@ -1865,9 +1875,14 @@ class NamespaceFS { | |
} | ||
if (!target_file) target_file = await native_fs_utils.open_file(fs_context, this.bucket_path, upload_path, open_mode); | ||
|
||
const create_object_upload_path = path.join(params.mpu_path, 'create_object_upload'); | ||
const create_object_upload_stat = await nb_native().fs.stat(fs_context, create_object_upload_path); | ||
//according to aws, multipart upload version time should be calculated based on creation rather then completion time. | ||
//change the files mtime to match the creation time | ||
await nb_native().fs.utimensat(fs_context, upload_path, {modtime: create_object_upload_stat.mtimeNsBigint}); | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you think we should fail the whole upload in case we were not able to set the mtime? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we can use the existing time in case it fails There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @guymguym WDYT? I'm not sure, since it might create a wrong .versions/ stack order, but on the other hand we probably don't want to fail on that
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const { data: create_params_buffer } = await nb_native().fs.readFile( | ||
fs_context, | ||
path.join(params.mpu_path, 'create_object_upload') | ||
create_object_upload_path | ||
); | ||
|
||
const upload_params = { fs_context, upload_path, open_mode, file_path, params, target_file }; | ||
|
@@ -2725,6 +2740,17 @@ class NamespaceFS { | |
return { mtimeNsBigint: size_utils.string_to_bigint(arr[0], 36), ino: parseInt(arr[1], 36) }; | ||
} | ||
|
||
/** | ||
* compares two version strings, and returns if ver1 time is more recent then ver2 | ||
* returns true if ver1 is more recent, otherwise return false | ||
* if at least one of the versions is null return false | ||
*/ | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
_is_version_more_recent(ver1, ver2) { | ||
const ver1_info = this._extract_version_info_from_xattr(ver1); | ||
const ver2_info = this._extract_version_info_from_xattr(ver2); | ||
return ver1_info && ver2_info && ver1_info.mtimeNsBigint > ver2_info.mtimeNsBigint; | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
_get_version_id_by_xattr(stat) { | ||
return (stat && stat.xattr[XATTR_VERSION_ID]) || 'null'; | ||
} | ||
|
@@ -3230,7 +3256,8 @@ class NamespaceFS { | |
} | ||
return { | ||
move_to_versions: { src_file: dst_file, dir_file }, | ||
move_to_dst: { src_file, dst_file, dir_file} | ||
move_to_dst: { src_file, dst_file, dir_file}, | ||
move_source_to_version: {src_file, dir_file} | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
} catch (err) { | ||
dbg.warn('NamespaceFS._open_files_gpfs couldn\'t open files', err); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -953,6 +953,10 @@ interface NativeFS { | |
checkAccess(fs_context: NativeFSContext, path: string): Promise<void>; | ||
getsinglexattr(fs_context: NativeFSContext, path: string, key: string): Promise<string>; | ||
getpwname(fs_context: NativeFSContext, user: string): Promise<NativeFSUserObject>; | ||
utimensat(fs_context: NativeFSContext, path: string, times: { | ||
modtime?: bigint, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use the conventional mtime and atime? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. modtime and actime are the names used by utime. https://man7.org/linux/man-pages/man2/utime.2.html. utimensat uses an array instead of a struct so it doesn't give them a name, but I decided to use the same naming. I can change them to fit with the names used in our code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so you can choose either, mtime and atime used in our code because of the node.js terms |
||
actime?: bigint | ||
}): Promise<void>; | ||
|
||
readFile( | ||
fs_context: NativeFSContext, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -407,7 +407,54 @@ mocha.describe('bucketspace namespace_fs - versioning', function() { | |
const exist = await version_file_exists(full_path, mpu_key1, '', prev_version_id); | ||
assert.ok(exist); | ||
}); | ||
|
||
mocha.it('mpu object - versioning enabled - put object before running mpu complete. mpu version should move to .versions dir and not latest', async function() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. worth adding a test for null version |
||
const mpu_res = await s3_uid6.createMultipartUpload({ Bucket: bucket_name, Key: mpu_key1 }); | ||
const upload_id = mpu_res.UploadId; | ||
const part1 = await s3_uid6.uploadPart({ | ||
Bucket: bucket_name, Key: mpu_key1, Body: body1, UploadId: upload_id, PartNumber: 1 }); | ||
const res_put_object = await s3_uid6.putObject({Bucket: bucket_name, Key: mpu_key1, Body: body1}); | ||
const res = await s3_uid6.completeMultipartUpload({ | ||
Bucket: bucket_name, | ||
Key: mpu_key1, | ||
UploadId: upload_id, | ||
MultipartUpload: { | ||
Parts: [{ | ||
ETag: part1.ETag, | ||
PartNumber: 1 | ||
}] | ||
} | ||
}); | ||
const exist = await version_file_exists(full_path, mpu_key1, '', res.VersionId); | ||
assert.ok(exist); | ||
const comp_res = await compare_version_ids(full_path, mpu_key1, res_put_object.VersionId, res.VersionId); | ||
assert.ok(comp_res); | ||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add an assert so we can see that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. compare_version_ids does exactly that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with Shira it's better to call headObject and check that the latest version is the single put version and not the MPU |
||
|
||
mocha.it('mpu object - versioning enabled - create delete marker before running mpu complete. mpu should move to .versions dir and not latest', async function() { | ||
const mpu_res = await s3_uid6.createMultipartUpload({ Bucket: bucket_name, Key: mpu_key1 }); | ||
const upload_id = mpu_res.UploadId; | ||
const part1 = await s3_uid6.uploadPart({ | ||
Bucket: bucket_name, Key: mpu_key1, Body: body1, UploadId: upload_id, PartNumber: 1 }); | ||
await s3_uid6.deleteObject({Bucket: bucket_name, Key: mpu_key1}); | ||
const res = await s3_uid6.completeMultipartUpload({ | ||
Bucket: bucket_name, | ||
Key: mpu_key1, | ||
UploadId: upload_id, | ||
MultipartUpload: { | ||
Parts: [{ | ||
ETag: part1.ETag, | ||
PartNumber: 1 | ||
}] | ||
} | ||
}); | ||
const exist = await version_file_exists(full_path, mpu_key1, '', res.VersionId); | ||
assert.ok(exist); | ||
const latest_version_dont_exist = fs_utils.file_not_exists(path.join(full_path, mpu_key1)); | ||
assert.ok(latest_version_dont_exist); | ||
}); | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}); | ||
|
||
}); | ||
|
||
// The res of putBucketVersioning is different depends on the versioning state: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -444,6 +444,31 @@ mocha.describe('nb_native fs', async function() { | |
await fs_utils.file_delete(tmp_mv_path); | ||
} | ||
}); | ||
|
||
mocha.describe('Utimensat', async function() { | ||
mocha.it('Utimensat - change both atime and mtime', async function() { | ||
const { utimensat } = nb_native().fs; | ||
const PATH1 = `/tmp/utimensat${Date.now()}_1`; | ||
nadavMiz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
await create_file(PATH1); | ||
const new_time = BigInt(Date.now()); | ||
await utimensat(DEFAULT_FS_CONFIG, PATH1, {modtime: new_time, actime: new_time}); | ||
const res = await nb_native().fs.stat(DEFAULT_FS_CONFIG, PATH1); | ||
assert.equal(res.atimeNsBigint, new_time); | ||
assert.equal(res.mtimeNsBigint, new_time); | ||
}); | ||
|
||
mocha.it('Utimensat - change only mtime', async function() { | ||
const { utimensat } = nb_native().fs; | ||
const PATH1 = `/tmp/utimensat${Date.now()}_1`; | ||
await create_file(PATH1); | ||
const new_time = BigInt(Date.now()); | ||
const res0 = await nb_native().fs.stat(DEFAULT_FS_CONFIG, PATH1); | ||
await utimensat(DEFAULT_FS_CONFIG, PATH1, {modtime: new_time}); | ||
const res1 = await nb_native().fs.stat(DEFAULT_FS_CONFIG, PATH1); | ||
assert.equal(res1.atimeNsBigint, res0.atimeNsBigint); | ||
assert.equal(res1.mtimeNsBigint, new_time); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a check that it really changed? the timestamp are very close |
||
}); | ||
}); | ||
}); | ||
}); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does mac have utimensat() function and same behavior?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its hard to know for sure as I don't have mac. but from what I found its available from osx 10.13. not sure what version of mac we start to support from.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please always make sure you are not breaking native build for the developers on mac, I sent you on slack the man utimensat
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, than it looks like it works on mac. also I should have mentioned it before: utimensat is defined in POSIX, so it should work on mac the same. even though you can never know. We will test this on mac regardless before pushing