From b696beea65bbffd31edac169ccf9708f46ab9652 Mon Sep 17 00:00:00 2001 From: Hein-Pieter van Braam Date: Wed, 15 Feb 2017 14:41:16 +0100 Subject: [PATCH] Correct hash behavior for floating point numbers This fixes HashMap where a key or part of a key is a floating point number. To fix this the following has been done: * HashMap now takes an extra template argument Comparator. This class gets used to compare keys. The default Comperator now works correctly for common types and floating point numbets. * Variant implements ::hash_compare() now. This function implements nan-safe comparison for all types with components that contain floating point numbers. * Variant now has a VariantComparator which uses Variant::hash_compare() safely compare floating point components of variant's types. * The hash functions for floating point numbers will now normalize NaN values so that all floating point numbers that are NaN hash to the same value. C++ module writers that want to use HashMap internally in their modules can now also safeguard against this crash by defining their on Comperator class that safely compares their types. GDScript users, or writers of modules that don't use HashMap internally in their modules don't need to do anything. This fixes #7354 and fixes #6947. --- core/class_db.cpp | 2 +- core/hash_map.h | 61 ++++--- core/hashfuncs.h | 42 ++++- core/math/math_funcs.h | 1 + core/object.h | 2 +- core/variant.cpp | 187 ++++++++++++++++++++- core/variant.h | 5 + drivers/gles2/shader_gles2.h | 2 +- drivers/gles3/shader_gles3.h | 2 +- modules/gdscript/gd_compiler.h | 2 +- modules/gdscript/gd_tokenizer.cpp | 2 +- scene/resources/packed_scene.cpp | 8 +- scene/resources/packed_scene.h | 4 +- tools/editor/plugins/baked_light_baker.cpp | 4 +- 14 files changed, 273 insertions(+), 51 deletions(-) diff --git a/core/class_db.cpp b/core/class_db.cpp index 76c2f0633a8..4bdae45fb9f 100644 --- a/core/class_db.cpp +++ b/core/class_db.cpp @@ -295,7 +295,7 @@ uint64_t ClassDB::get_api_hash(APIType p_api) { OBJTYPE_RLOCK; #ifdef DEBUG_METHODS_ENABLED - uint64_t hash = hash_djb2_one_64(HashMapHahserDefault::hash(VERSION_FULL_NAME)); + uint64_t hash = hash_djb2_one_64(HashMapHasherDefault::hash(VERSION_FULL_NAME)); List names; diff --git a/core/hash_map.h b/core/hash_map.h index 0d552069357..515fc6c4fe7 100644 --- a/core/hash_map.h +++ b/core/hash_map.h @@ -30,40 +30,45 @@ #define HASH_MAP_H #include "hashfuncs.h" +#include "math_funcs.h" #include "error_macros.h" #include "ustring.h" #include "os/memory.h" #include "list.h" - -class HashMapHahserDefault { -public: - +struct HashMapHasherDefault { static _FORCE_INLINE_ uint32_t hash(const String &p_string) { return p_string.hash(); } static _FORCE_INLINE_ uint32_t hash(const char *p_cstr) { return hash_djb2(p_cstr); } - static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) { - uint64_t v=p_int; - v = (~v) + (v << 18); // v = (v << 18) - v - 1; - v = v ^ (v >> 31); - v = v * 21; // v = (v + (v << 2)) + (v << 4); - v = v ^ (v >> 11); - v = v + (v << 6); - v = v ^ (v >> 22); - return (int) v; - } + static _FORCE_INLINE_ uint32_t hash(const uint64_t p_int) { return hash_one_uint64(p_int); } + static _FORCE_INLINE_ uint32_t hash(const int64_t p_int) { return hash(uint64_t(p_int)); } - - - static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return p_int; } + static _FORCE_INLINE_ uint32_t hash(const float p_float) { return hash_djb2_one_float(p_float); } + static _FORCE_INLINE_ uint32_t hash(const double p_double){ return hash_djb2_one_float(p_double); } + static _FORCE_INLINE_ uint32_t hash(const uint32_t p_int) { return p_int; } static _FORCE_INLINE_ uint32_t hash(const int32_t p_int) { return (uint32_t)p_int; } - static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return p_int; } + static _FORCE_INLINE_ uint32_t hash(const uint16_t p_int) { return p_int; } static _FORCE_INLINE_ uint32_t hash(const int16_t p_int) { return (uint32_t)p_int; } static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return p_int; } - static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return (uint32_t)p_int; } - static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return (uint32_t)p_wchar; } + static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return (uint32_t)p_int; } + static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar){ return (uint32_t)p_wchar; } //static _FORCE_INLINE_ uint32_t hash(const void* p_ptr) { return uint32_t(uint64_t(p_ptr))*(0x9e3779b1L); } }; +template +struct HashMapComparatorDefault { + static bool compare(const T& p_lhs, const T& p_rhs) { + return p_lhs == p_rhs; + } + + bool compare(const float& p_lhs, const float& p_rhs) { + return (p_lhs == p_rhs) || (Math::is_nan(p_lhs) && Math::is_nan(p_rhs)); + } + + bool compare(const double& p_lhs, const double& p_rhs) { + return (p_lhs == p_rhs) || (Math::is_nan(p_lhs) && Math::is_nan(p_rhs)); + } +}; + /** * @class HashMap * @author Juan Linietsky @@ -74,13 +79,14 @@ public: * @param TKey Key, search is based on it, needs to be hasheable. It is unique in this container. * @param TData Data, data associated with the key * @param Hasher Hasher object, needs to provide a valid static hash function for TKey + * @param Comparator comparator object, needs to be able to safely compare two TKey values. It needs to ensure that x == x for any items inserted in the map. Bear in mind that nan != nan when implementing an equality check. * @param MIN_HASH_TABLE_POWER Miminum size of the hash table, as a power of two. You rarely need to change this parameter. * @param RELATIONSHIP Relationship at which the hash table is resized. if amount of elements is RELATIONSHIP * times bigger than the hash table, table is resized to solve this condition. if RELATIONSHIP is zero, table is always MIN_HASH_TABLE_POWER. * */ -template +template, uint8_t MIN_HASH_TABLE_POWER=3,uint8_t RELATIONSHIP=8> class HashMap { public: @@ -194,7 +200,6 @@ private: } - /* I want to have only one function.. */ _FORCE_INLINE_ const Entry * get_entry( const TKey& p_key ) const { @@ -206,7 +211,7 @@ private: while (e) { /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && e->pair.key == p_key ) { + if (e->hash == hash && Comparator::compare(e->pair.key,p_key) ) { /* the pair exists in this hashtable, so just update data */ return e; @@ -253,7 +258,6 @@ private: for (int i=0;i<( 1<hash == hash && e->pair.key == p_custom_key ) { + if (e->hash == hash && Comparator::compare(e->pair.key,p_custom_key) ) { /* the pair exists in this hashtable, so just update data */ return &e->pair.data; @@ -411,7 +415,7 @@ public: while (e) { /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && e->pair.key == p_custom_key ) { + if (e->hash == hash && Comparator::compare(e->pair.key,p_custom_key) ) { /* the pair exists in this hashtable, so just update data */ return &e->pair.data; @@ -442,7 +446,7 @@ public: while (e) { /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && e->pair.key == p_key ) { + if (e->hash == hash && Comparator::compare(e->pair.key,p_key) ) { if (p) { @@ -637,7 +641,8 @@ public: clear(); } - }; + + #endif diff --git a/core/hashfuncs.h b/core/hashfuncs.h index e9e57d8b42f..121d7e8c59a 100644 --- a/core/hashfuncs.h +++ b/core/hashfuncs.h @@ -29,7 +29,8 @@ #ifndef HASHFUNCS_H #define HASHFUNCS_H - +#include "math_funcs.h" +#include "math_defs.h" #include "typedefs.h" /** @@ -69,19 +70,52 @@ static inline uint32_t hash_djb2_one_32(uint32_t p_in,uint32_t p_prev=5381) { return ((p_prev<<5)+p_prev)+p_in; } +static inline uint32_t hash_one_uint64(const uint64_t p_int) { + uint64_t v=p_int; + v = (~v) + (v << 18); // v = (v << 18) - v - 1; + v = v ^ (v >> 31); + v = v * 21; // v = (v + (v << 2)) + (v << 4); + v = v ^ (v >> 11); + v = v + (v << 6); + v = v ^ (v >> 22); + return (int) v; +} + static inline uint32_t hash_djb2_one_float(float p_in,uint32_t p_prev=5381) { union { float f; uint32_t i; } u; - // handle -0 case - if (p_in==0.0f) u.f=0.0f; - else u.f=p_in; + // Normalize +/- 0.0 and NaN values so they hash the same. + if (p_in==0.0f) + u.f=0.0; + else if (Math::is_nan(p_in)) + u.f=Math_NAN; + else + u.f=p_in; return ((p_prev<<5)+p_prev)+u.i; } +// Overload for real_t size changes +static inline uint32_t hash_djb2_one_float(double p_in,uint32_t p_prev=5381) { + union { + double d; + uint64_t i; + } u; + + // Normalize +/- 0.0 and NaN values so they hash the same. + if (p_in==0.0f) + u.d=0.0; + else if (Math::is_nan(p_in)) + u.d=Math_NAN; + else + u.d=p_in; + + return ((p_prev<<5)+p_prev) + hash_one_uint64(u.i); +} + template static inline uint32_t make_uint32_t(T p_in) { diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h index 511af91835f..51877891e35 100644 --- a/core/math/math_funcs.h +++ b/core/math/math_funcs.h @@ -39,6 +39,7 @@ #define Math_PI 3.14159265358979323846 #define Math_SQRT12 0.7071067811865475244008443621048490 #define Math_LN2 0.693147180559945309417 +#define Math_NAN NAN class Math { diff --git a/core/object.h b/core/object.h index ba4fa62e1a8..3fe31bee6b4 100644 --- a/core/object.h +++ b/core/object.h @@ -680,7 +680,7 @@ class ObjectDB { unsigned long i; } u; u.p=p_obj; - return HashMapHahserDefault::hash((uint64_t)u.i); + return HashMapHasherDefault::hash((uint64_t)u.i); } }; diff --git a/core/variant.cpp b/core/variant.cpp index 103c8f67460..132d7a1c882 100644 --- a/core/variant.cpp +++ b/core/variant.cpp @@ -28,6 +28,7 @@ /*************************************************************************/ #include "variant.h" +#include "math_funcs.h" #include "resource.h" #include "print_string.h" #include "scene/main/node.h" @@ -2674,14 +2675,10 @@ uint32_t Variant::hash() const { case INT: { return _data._int; - } break; case REAL: { - MarshallFloat mf; - mf.f=_data._real; - return mf.i; - + return hash_djb2_one_float(_data._real); } break; case STRING: { @@ -2921,6 +2918,186 @@ uint32_t Variant::hash() const { } +#define hash_compare_scalar(p_lhs, p_rhs)\ + ((p_lhs) == (p_rhs)) || (Math::is_nan(p_lhs) == Math::is_nan(p_rhs)) + +#define hash_compare_vector2(p_lhs, p_rhs)\ + (hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \ + (hash_compare_scalar((p_lhs).y, (p_rhs).y)) + +#define hash_compare_vector3(p_lhs, p_rhs)\ + (hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \ + (hash_compare_scalar((p_lhs).y, (p_rhs).y)) && \ + (hash_compare_scalar((p_lhs).z, (p_rhs).z)) + +#define hash_compare_quat(p_lhs, p_rhs)\ + (hash_compare_scalar((p_lhs).x, (p_rhs).x)) && \ + (hash_compare_scalar((p_lhs).y, (p_rhs).y)) && \ + (hash_compare_scalar((p_lhs).z, (p_rhs).z)) && \ + (hash_compare_scalar((p_lhs).w, (p_rhs).w)) + +#define hash_compare_color(p_lhs, p_rhs)\ + (hash_compare_scalar((p_lhs).r, (p_rhs).r)) && \ + (hash_compare_scalar((p_lhs).g, (p_rhs).g)) && \ + (hash_compare_scalar((p_lhs).b, (p_rhs).b)) && \ + (hash_compare_scalar((p_lhs).a, (p_rhs).a)) + +#define hash_compare_pool_array(p_lhs, p_rhs, p_type, p_compare_func)\ + const PoolVector& l = *reinterpret_cast*>(p_lhs);\ + const PoolVector& r = *reinterpret_cast*>(p_rhs);\ + \ + if(l.size() != r.size()) \ + return false; \ + \ + PoolVector::Read lr = l.read(); \ + PoolVector::Read rr = r.read(); \ + \ + for(int i = 0; i < l.size(); ++i) { \ + if(! p_compare_func((lr[0]), (rr[0]))) \ + return false; \ + }\ + \ + return true + +bool Variant::hash_compare(const Variant& p_variant) const { + if (type != p_variant.type) + return false; + + switch( type ) { + case REAL: { + return hash_compare_scalar(_data._real, p_variant._data._real); + } break; + + case VECTOR2: { + const Vector2* l = reinterpret_cast(_data._mem); + const Vector2* r = reinterpret_cast(p_variant._data._mem); + + return hash_compare_vector2(*l, *r); + } break; + + case RECT2: { + const Rect2* l = reinterpret_cast(_data._mem); + const Rect2* r = reinterpret_cast(p_variant._data._mem); + + return (hash_compare_vector2(l->pos, r->pos)) && + (hash_compare_vector2(l->size, r->size)); + } break; + + case TRANSFORM2D: { + Transform2D* l = _data._transform2d; + Transform2D* r = p_variant._data._transform2d; + + for(int i=0;i<3;i++) { + if (! (hash_compare_vector2(l->elements[i], r->elements[i]))) + return false; + } + + return true; + } break; + + case VECTOR3: { + const Vector3* l = reinterpret_cast(_data._mem); + const Vector3* r = reinterpret_cast(p_variant._data._mem); + + return hash_compare_vector3(*l, *r); + } break; + + case PLANE: { + const Plane* l = reinterpret_cast(_data._mem); + const Plane* r = reinterpret_cast(p_variant._data._mem); + + return (hash_compare_vector3(l->normal, r->normal)) && + (hash_compare_scalar(l->d, r->d)); + } break; + + case RECT3: { + const Rect3* l = _data._rect3; + const Rect3* r = p_variant._data._rect3; + + return (hash_compare_vector3(l->pos, r->pos) && + (hash_compare_vector3(l->size, r->size))); + + } break; + + case QUAT: { + const Quat* l = reinterpret_cast(_data._mem); + const Quat* r = reinterpret_cast(p_variant._data._mem); + + return hash_compare_quat(*l, *r); + } break; + + case BASIS: { + const Basis* l = _data._basis; + const Basis* r = p_variant._data._basis; + + for(int i=0;i<3;i++) { + if (! (hash_compare_vector3(l->elements[i], r->elements[i]))) + return false; + } + + return true; + } break; + + case TRANSFORM: { + const Transform* l = _data._transform; + const Transform* r = p_variant._data._transform; + + for(int i=0;i<3;i++) { + if (! (hash_compare_vector3(l->basis.elements[i], r->basis.elements[i]))) + return false; + } + + return hash_compare_vector3(l->origin, r->origin); + } break; + + case COLOR: { + const Color* l = reinterpret_cast(_data._mem); + const Color* r = reinterpret_cast(p_variant._data._mem); + + return hash_compare_color(*l, *r); + } break; + + case ARRAY: { + const Array& l = *(reinterpret_cast(_data._mem)); + const Array& r = *(reinterpret_cast(p_variant._data._mem)); + + if(l.size() != r.size()) + return false; + + for(int i = 0; i < l.size(); ++i) { + if(! l[0].hash_compare(r[0])) + return false; + } + + return true; + } break; + + case POOL_REAL_ARRAY: { + hash_compare_pool_array(_data._mem, p_variant._data._mem, real_t, hash_compare_scalar); + } break; + + case POOL_VECTOR2_ARRAY: { + hash_compare_pool_array(_data._mem, p_variant._data._mem, Vector2, hash_compare_vector2); + } break; + + case POOL_VECTOR3_ARRAY: { + hash_compare_pool_array(_data._mem, p_variant._data._mem, Vector3, hash_compare_vector3); + } break; + + case POOL_COLOR_ARRAY: { + hash_compare_pool_array(_data._mem, p_variant._data._mem, Color, hash_compare_color); + } break; + + default: + bool v; + Variant r; + evaluate(OP_EQUAL,*this,p_variant,r,v); + return r; + } + + return false; +} + bool Variant::is_ref() const { diff --git a/core/variant.h b/core/variant.h index 5936325c1b8..f9ceca1ca0d 100644 --- a/core/variant.h +++ b/core/variant.h @@ -421,6 +421,7 @@ public: bool operator<(const Variant& p_variant) const; uint32_t hash() const; + bool hash_compare(const Variant& p_variant) const; bool booleanize(bool &valid) const; void static_assign(const Variant& p_variant); @@ -459,6 +460,10 @@ struct VariantHasher { static _FORCE_INLINE_ uint32_t hash(const Variant &p_variant) { return p_variant.hash(); } }; +struct VariantComparator { + + static _FORCE_INLINE_ bool compare(const Variant &p_lhs, const Variant &p_rhs) { return p_lhs.hash_compare(p_rhs); } +}; Variant::ObjData& Variant::_get_obj() { diff --git a/drivers/gles2/shader_gles2.h b/drivers/gles2/shader_gles2.h index 509f9a82b4d..004d636c1ea 100644 --- a/drivers/gles2/shader_gles2.h +++ b/drivers/gles2/shader_gles2.h @@ -134,7 +134,7 @@ private: struct VersionKeyHash { - static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHahserDefault::hash(p_key.key); }; + static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHasherDefault::hash(p_key.key); }; }; //this should use a way more cachefriendly version.. diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index ee8db2ac8c9..c2d283d4922 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -149,7 +149,7 @@ private: struct VersionKeyHash { - static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHahserDefault::hash(p_key.key); }; + static _FORCE_INLINE_ uint32_t hash( const VersionKey& p_key) { return HashMapHasherDefault::hash(p_key.key); }; }; //this should use a way more cachefriendly version.. diff --git a/modules/gdscript/gd_compiler.h b/modules/gdscript/gd_compiler.h index dd211a852c7..eb6079e8e0d 100644 --- a/modules/gdscript/gd_compiler.h +++ b/modules/gdscript/gd_compiler.h @@ -93,7 +93,7 @@ class GDCompiler { //int get_identifier_pos(const StringName& p_dentifier) const; - HashMap constant_map; + HashMap constant_map; Map name_map; int get_name_map_pos(const StringName& p_identifier) { diff --git a/modules/gdscript/gd_tokenizer.cpp b/modules/gdscript/gd_tokenizer.cpp index 5be2a2beae9..477a1f1ac81 100644 --- a/modules/gdscript/gd_tokenizer.cpp +++ b/modules/gdscript/gd_tokenizer.cpp @@ -1169,7 +1169,7 @@ Vector GDTokenizerBuffer::parse_code_string(const String& p_code) { Map identifier_map; - HashMap constant_map; + HashMap constant_map; Map line_map; Vector token_array; diff --git a/scene/resources/packed_scene.cpp b/scene/resources/packed_scene.cpp index 17688c366a1..99734fd6565 100644 --- a/scene/resources/packed_scene.cpp +++ b/scene/resources/packed_scene.cpp @@ -361,7 +361,7 @@ static int _nm_get_string(const String& p_string, Map &name_map) return idx; } -static int _vm_get_variant(const Variant& p_variant, HashMap &variant_map) { +static int _vm_get_variant(const Variant& p_variant, HashMap &variant_map) { if (variant_map.has(p_variant)) return variant_map[p_variant]; @@ -371,7 +371,7 @@ static int _vm_get_variant(const Variant& p_variant, HashMap &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map) { +Error SceneState::_parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map) { // this function handles all the work related to properly packing scenes, be it @@ -747,7 +747,7 @@ Error SceneState::_parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map) { +Error SceneState::_parse_connections(Node *p_owner,Node *p_node, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map) { if (p_node!=p_owner && p_node->get_owner() && p_node->get_owner()!=p_owner && !p_owner->is_editable_instance(p_node->get_owner())) return OK; @@ -952,7 +952,7 @@ Error SceneState::pack(Node *p_scene) { Node *scene = p_scene; Map name_map; - HashMap variant_map; + HashMap variant_map; Map node_map; Map nodepath_map; diff --git a/scene/resources/packed_scene.h b/scene/resources/packed_scene.h index 381b356b1e5..4a3841abe9f 100644 --- a/scene/resources/packed_scene.h +++ b/scene/resources/packed_scene.h @@ -91,8 +91,8 @@ class SceneState : public Reference { Vector connections; - Error _parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map); - Error _parse_connections(Node *p_owner,Node *p_node, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map); + Error _parse_node(Node *p_owner,Node *p_node,int p_parent_idx, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map); + Error _parse_connections(Node *p_owner,Node *p_node, Map &name_map,HashMap &variant_map,Map &node_map,Map &nodepath_map); String path; diff --git a/tools/editor/plugins/baked_light_baker.cpp b/tools/editor/plugins/baked_light_baker.cpp index 52220839d91..4219e791d18 100644 --- a/tools/editor/plugins/baked_light_baker.cpp +++ b/tools/editor/plugins/baked_light_baker.cpp @@ -1334,7 +1334,7 @@ void BakedLightBaker::_make_octree_texture() { base<<=16; base|=int((pos.z+cell_size*0.5)/cell_size); - uint32_t hash = HashMapHahserDefault::hash(base); + uint32_t hash = HashMapHasherDefault::hash(base); uint32_t idx = hash % hash_table_size; octhashptr[oct_idx].next=hashptr[idx]; octhashptr[oct_idx].hash=hash; @@ -1362,7 +1362,7 @@ void BakedLightBaker::_make_octree_texture() { base<<=16; base|=int((pos.z+cell_size*0.5)/cell_size); - uint32_t hash = HashMapHahserDefault::hash(base); + uint32_t hash = HashMapHasherDefault::hash(base); uint32_t idx = hash % hash_table_size; uint32_t bucket = hashptr[idx];