diff --git a/packages/vaex-core/src/hash_primitives.hpp b/packages/vaex-core/src/hash_primitives.hpp index f88cbbe144..f9c8796ee9 100644 --- a/packages/vaex-core/src/hash_primitives.hpp +++ b/packages/vaex-core/src/hash_primitives.hpp @@ -303,24 +303,26 @@ class hash_base : public hash_common> { py::object key_array() { py::array_t output_array(this->length()); auto output = output_array.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - auto offsets = this->offsets(); - size_t map_index = 0; - int64_t natural_order = 0; - // TODO: can be parallel due to non-overlapping maps - for (auto &map : this->maps) { - for (auto &el : map) { - key_type key = el.first; - int64_t index = static_cast(*this).key_offset(natural_order++, map_index, el, offsets[map_index]); - output(index) = key; + { + py::gil_scoped_release gil; + auto offsets = this->offsets(); + size_t map_index = 0; + int64_t natural_order = 0; + // TODO: can be parallel due to non-overlapping maps + for (auto &map : this->maps) { + for (auto &el : map) { + key_type key = el.first; + int64_t index = static_cast(*this).key_offset(natural_order++, map_index, el, offsets[map_index]); + output(index) = key; + } + map_index += 1; + } + if (this->nan_count) { + output(this->nan_index()) = NaNish::value; + } + if (this->null_count) { + output(this->null_index()) = -1; } - map_index += 1; - } - if (this->nan_count) { - output(this->nan_index()) = NaNish::value; - } - if (this->null_count) { - output(this->null_index()) = -1; } return output_array; } @@ -630,46 +632,49 @@ class ordered_set : public hash_base, T2, Hashmap2> { if (result.strides()[0] != result.itemsize()) { throw std::runtime_error("stride not equal to bytesize for output"); } - py::gil_scoped_release gil; + { + py::gil_scoped_release gil; - size_t nmaps = this->maps.size(); - auto offsets = this->offsets(); - if (nmaps == 1) { - auto &map0 = this->maps[0]; - for (int64_t i = 0; i < size; i++) { - const key_type &value = input[i]; - // the caller is responsible for finding masked values - if (custom_isnan(value)) { - output[i] = this->nan_value; - // TODO: the test fail here because we pass in NaN for None? - // but of course only in debug mode - assert(this->nan_count > 0); - } else { - auto search = map0.find(value); - if (search == map0.end()) { - output[i] = -1; + size_t nmaps = this->maps.size(); + auto offsets = this->offsets(); + if (nmaps == 1) { + auto &map0 = this->maps[0]; + for (int64_t i = 0; i < size; i++) { + const key_type &value = input[i]; + // the caller is responsible for finding masked values + if (custom_isnan(value)) { + if(this->null_count > 0) { + output[i] = this->nan_value; + } else { + output[i] = -1; + } } else { - output[i] = search->second; + auto search = map0.find(value); + if (search == map0.end()) { + output[i] = -1; + } else { + output[i] = search->second; + } } } - } - } else { - for (int64_t i = 0; i < size; i++) { - const key_type &value = input[i]; - // the caller is responsible for finding masked values - if (custom_isnan(value)) { - output[i] = this->nan_value; - // TODO: the test fail here because we pass in NaN for None? - // but of course only in debug mode - assert(this->nan_count > 0); - } else { - std::size_t hash = hasher_map_choice()(value); - size_t map_index = (hash % nmaps); - auto search = this->maps[map_index].find(value); - if (search == this->maps[map_index].end()) { - output[i] = -1; + } else { + for (int64_t i = 0; i < size; i++) { + const key_type &value = input[i]; + // the caller is responsible for finding masked values + if (custom_isnan(value)) { + output[i] = this->nan_value; + // TODO: the test fail here because we pass in NaN for None? + // but of course only in debug mode + assert(this->nan_count > 0); } else { - output[i] = search->second + offsets[map_index]; + std::size_t hash = hasher_map_choice()(value); + size_t map_index = (hash % nmaps); + auto search = this->maps[map_index].find(value); + if (search == this->maps[map_index].end()) { + output[i] = -1; + } else { + output[i] = search->second + offsets[map_index]; + } } } } diff --git a/packages/vaex-core/src/hash_string.hpp b/packages/vaex-core/src/hash_string.hpp index 40b6b56cad..65d4c9c457 100644 --- a/packages/vaex-core/src/hash_string.hpp +++ b/packages/vaex-core/src/hash_string.hpp @@ -555,19 +555,35 @@ class ordered_set : public hash_base, T, T, V> { return result; } auto output = result.template mutable_unchecked<1>(); - py::gil_scoped_release gil; size_t nmaps = this->maps.size(); auto offsets = this->offsets(); - if (nmaps == 1) { - auto &map0 = this->maps[0]; - // split slow and fast path - if (strings->has_null()) { - for (int64_t i = 0; i < size; i++) { - if (strings->is_null(i)) { - output(i) = this->null_value; - assert(this->null_count > 0); - } else { + { + py::gil_scoped_release gil; + if (nmaps == 1) { + auto &map0 = this->maps[0]; + // split slow and fast path + if (strings->has_null()) { + for (int64_t i = 0; i < size; i++) { + if (strings->is_null(i)) { + if(this->null_count > 0) { + output(i) = this->null_value; + } else { + output(i) = -1; + } + } else { + const string_view &key = strings->view(i); + auto search = map0.find(key); + auto end = map0.end(); + if (search == end) { + output(i) = -1; + } else { + output(i) = search->second; + } + } + } + } else { + for (int64_t i = 0; i < size; i++) { const string_view &key = strings->view(i); auto search = map0.find(key); auto end = map0.end(); @@ -579,27 +595,29 @@ class ordered_set : public hash_base, T, T, V> { } } } else { - for (int64_t i = 0; i < size; i++) { - const string_view &key = strings->view(i); - auto search = map0.find(key); - auto end = map0.end(); - if (search == end) { - output(i) = -1; - } else { - output(i) = search->second; + // split slow and fast path + if (strings->has_null()) { + for (int64_t i = 0; i < size; i++) { + if (strings->is_null(i)) { + output(i) = this->null_value; + assert(this->null_count > 0); + } else { + const string_view &key = strings->view(i); + size_t hash = hasher_map_choice()(key); + size_t map_index = (hash % nmaps); + auto search = this->maps[map_index].find(key, hash); + auto end = this->maps[map_index].end(); + if (search == end) { + output(i) = -1; + } else { + output(i) = search->second + offsets[map_index]; + } + } } - } - } - } else { - // split slow and fast path - if (strings->has_null()) { - for (int64_t i = 0; i < size; i++) { - if (strings->is_null(i)) { - output(i) = this->null_value; - assert(this->null_count > 0); - } else { + } else { + for (int64_t i = 0; i < size; i++) { const string_view &key = strings->view(i); - size_t hash = hasher_map_choice()(key); + std::size_t hash = hasher_map_choice()(key); size_t map_index = (hash % nmaps); auto search = this->maps[map_index].find(key, hash); auto end = this->maps[map_index].end(); @@ -610,19 +628,6 @@ class ordered_set : public hash_base, T, T, V> { } } } - } else { - for (int64_t i = 0; i < size; i++) { - const string_view &key = strings->view(i); - std::size_t hash = hasher_map_choice()(key); - size_t map_index = (hash % nmaps); - auto search = this->maps[map_index].find(key, hash); - auto end = this->maps[map_index].end(); - if (search == end) { - output(i) = -1; - } else { - output(i) = search->second + offsets[map_index]; - } - } } } return result;