Merge pull request #44319 from reduz/integrate-meshoptimizer
Rework Mesh handling on scene importing.
This commit is contained in:
commit
fe49eaa544
|
@ -3654,6 +3654,9 @@ void EditorNode::register_editor_types() {
|
|||
ClassDB::register_class<ScriptCreateDialog>();
|
||||
ClassDB::register_class<EditorFeatureProfile>();
|
||||
ClassDB::register_class<EditorSpinSlider>();
|
||||
ClassDB::register_class<EditorSceneImporterMesh>();
|
||||
ClassDB::register_class<EditorSceneImporterMeshNode>();
|
||||
|
||||
ClassDB::register_virtual_class<FileSystemDock>();
|
||||
|
||||
// FIXME: Is this stuff obsolete, or should it be ported to new APIs?
|
||||
|
|
|
@ -67,7 +67,7 @@ struct ColladaImport {
|
|||
|
||||
Map<String, NodeMap> node_map; //map from collada node to engine node
|
||||
Map<String, String> node_name_map; //map from collada node to engine node
|
||||
Map<String, Ref<ArrayMesh>> mesh_cache;
|
||||
Map<String, Ref<EditorSceneImporterMesh>> mesh_cache;
|
||||
Map<String, Ref<Curve3D>> curve_cache;
|
||||
Map<String, Ref<Material>> material_cache;
|
||||
Map<Collada::Node *, Skeleton3D *> skeleton_map;
|
||||
|
@ -83,7 +83,7 @@ struct ColladaImport {
|
|||
Error _create_scene(Collada::Node *p_node, Node3D *p_parent);
|
||||
Error _create_resources(Collada::Node *p_node, bool p_use_compression);
|
||||
Error _create_material(const String &p_target);
|
||||
Error _create_mesh_surfaces(bool p_optimize, Ref<ArrayMesh> &p_mesh, const Map<String, Collada::NodeGeometry::Material> &p_material_map, const Collada::MeshData &meshdata, const Transform &p_local_xform, const Vector<int> &bone_remap, const Collada::SkinControllerData *p_skin_controller, const Collada::MorphControllerData *p_morph_data, Vector<Ref<ArrayMesh>> p_morph_meshes = Vector<Ref<ArrayMesh>>(), bool p_use_compression = false, bool p_use_mesh_material = false);
|
||||
Error _create_mesh_surfaces(bool p_optimize, Ref<EditorSceneImporterMesh> &p_mesh, const Map<String, Collada::NodeGeometry::Material> &p_material_map, const Collada::MeshData &meshdata, const Transform &p_local_xform, const Vector<int> &bone_remap, const Collada::SkinControllerData *p_skin_controller, const Collada::MorphControllerData *p_morph_data, Vector<Ref<EditorSceneImporterMesh>> p_morph_meshes = Vector<Ref<EditorSceneImporterMesh>>(), bool p_use_compression = false, bool p_use_mesh_material = false);
|
||||
Error load(const String &p_path, int p_flags, bool p_force_make_tangents = false, bool p_use_compression = false);
|
||||
void _fix_param_animation_tracks();
|
||||
void create_animation(int p_clip, bool p_make_tracks_in_all_bones, bool p_import_value_tracks);
|
||||
|
@ -278,8 +278,8 @@ Error ColladaImport::_create_scene(Collada::Node *p_node, Node3D *p_parent) {
|
|||
node = memnew(Path3D);
|
||||
} else {
|
||||
//mesh since nothing else
|
||||
node = memnew(MeshInstance3D);
|
||||
//Object::cast_to<MeshInstance3D>(node)->set_flag(GeometryInstance3D::FLAG_USE_BAKED_LIGHT, true);
|
||||
node = memnew(EditorSceneImporterMeshNode);
|
||||
//Object::cast_to<EditorSceneImporterMeshNode>(node)->set_flag(GeometryInstance3D::FLAG_USE_BAKED_LIGHT, true);
|
||||
}
|
||||
} break;
|
||||
case Collada::Node::TYPE_SKELETON: {
|
||||
|
@ -440,7 +440,7 @@ Error ColladaImport::_create_material(const String &p_target) {
|
|||
return OK;
|
||||
}
|
||||
|
||||
Error ColladaImport::_create_mesh_surfaces(bool p_optimize, Ref<ArrayMesh> &p_mesh, const Map<String, Collada::NodeGeometry::Material> &p_material_map, const Collada::MeshData &meshdata, const Transform &p_local_xform, const Vector<int> &bone_remap, const Collada::SkinControllerData *p_skin_controller, const Collada::MorphControllerData *p_morph_data, Vector<Ref<ArrayMesh>> p_morph_meshes, bool p_use_compression, bool p_use_mesh_material) {
|
||||
Error ColladaImport::_create_mesh_surfaces(bool p_optimize, Ref<EditorSceneImporterMesh> &p_mesh, const Map<String, Collada::NodeGeometry::Material> &p_material_map, const Collada::MeshData &meshdata, const Transform &p_local_xform, const Vector<int> &bone_remap, const Collada::SkinControllerData *p_skin_controller, const Collada::MorphControllerData *p_morph_data, Vector<Ref<EditorSceneImporterMesh>> p_morph_meshes, bool p_use_compression, bool p_use_mesh_material) {
|
||||
bool local_xform_mirror = p_local_xform.basis.determinant() < 0;
|
||||
|
||||
if (p_morph_data) {
|
||||
|
@ -457,9 +457,9 @@ Error ColladaImport::_create_mesh_surfaces(bool p_optimize, Ref<ArrayMesh> &p_me
|
|||
p_mesh->add_blend_shape(name);
|
||||
}
|
||||
if (p_morph_data->mode == "RELATIVE") {
|
||||
p_mesh->set_blend_shape_mode(ArrayMesh::BLEND_SHAPE_MODE_RELATIVE);
|
||||
p_mesh->set_blend_shape_mode(Mesh::BLEND_SHAPE_MODE_RELATIVE);
|
||||
} else if (p_morph_data->mode == "NORMALIZED") {
|
||||
p_mesh->set_blend_shape_mode(ArrayMesh::BLEND_SHAPE_MODE_NORMALIZED);
|
||||
p_mesh->set_blend_shape_mode(Mesh::BLEND_SHAPE_MODE_NORMALIZED);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -897,7 +897,7 @@ Error ColladaImport::_create_mesh_surfaces(bool p_optimize, Ref<ArrayMesh> &p_me
|
|||
////////////////////////////
|
||||
|
||||
for (int mi = 0; mi < p_morph_meshes.size(); mi++) {
|
||||
Array a = p_morph_meshes[mi]->surface_get_arrays(surface);
|
||||
Array a = p_morph_meshes[mi]->get_surface_arrays(surface);
|
||||
//add valid weight and bone arrays if they exist, TODO check if they are unique to shape (generally not)
|
||||
|
||||
if (has_weights) {
|
||||
|
@ -910,14 +910,15 @@ Error ColladaImport::_create_mesh_surfaces(bool p_optimize, Ref<ArrayMesh> &p_me
|
|||
mr.push_back(a);
|
||||
}
|
||||
|
||||
p_mesh->add_surface_from_arrays(Mesh::PRIMITIVE_TRIANGLES, d, mr, Dictionary(), 0);
|
||||
|
||||
String surface_name;
|
||||
Ref<Material> mat;
|
||||
if (material.is_valid()) {
|
||||
if (p_use_mesh_material) {
|
||||
p_mesh->surface_set_material(surface, material);
|
||||
mat = material;
|
||||
}
|
||||
p_mesh->surface_set_name(surface, material->get_name());
|
||||
surface_name = material->get_name();
|
||||
}
|
||||
p_mesh->add_surface(Mesh::PRIMITIVE_TRIANGLES, d, mr, Dictionary(), mat, surface_name);
|
||||
}
|
||||
|
||||
/*****************/
|
||||
|
@ -1002,10 +1003,10 @@ Error ColladaImport::_create_resources(Collada::Node *p_node, bool p_use_compres
|
|||
}
|
||||
}
|
||||
|
||||
if (Object::cast_to<MeshInstance3D>(node)) {
|
||||
if (Object::cast_to<EditorSceneImporterMeshNode>(node)) {
|
||||
Collada::NodeGeometry *ng2 = static_cast<Collada::NodeGeometry *>(p_node);
|
||||
|
||||
MeshInstance3D *mi = Object::cast_to<MeshInstance3D>(node);
|
||||
EditorSceneImporterMeshNode *mi = Object::cast_to<EditorSceneImporterMeshNode>(node);
|
||||
|
||||
ERR_FAIL_COND_V(!mi, ERR_BUG);
|
||||
|
||||
|
@ -1014,7 +1015,7 @@ Error ColladaImport::_create_resources(Collada::Node *p_node, bool p_use_compres
|
|||
String meshid;
|
||||
Transform apply_xform;
|
||||
Vector<int> bone_remap;
|
||||
Vector<Ref<ArrayMesh>> morphs;
|
||||
Vector<Ref<EditorSceneImporterMesh>> morphs;
|
||||
|
||||
if (ng2->controller) {
|
||||
String ngsource = ng2->source;
|
||||
|
@ -1083,10 +1084,10 @@ Error ColladaImport::_create_resources(Collada::Node *p_node, bool p_use_compres
|
|||
for (int i = 0; i < names.size(); i++) {
|
||||
String meshid2 = names[i];
|
||||
if (collada.state.mesh_data_map.has(meshid2)) {
|
||||
Ref<ArrayMesh> mesh = Ref<ArrayMesh>(memnew(ArrayMesh));
|
||||
Ref<EditorSceneImporterMesh> mesh = Ref<EditorSceneImporterMesh>(memnew(EditorSceneImporterMesh));
|
||||
const Collada::MeshData &meshdata = collada.state.mesh_data_map[meshid2];
|
||||
mesh->set_name(meshdata.name);
|
||||
Error err = _create_mesh_surfaces(false, mesh, ng2->material_map, meshdata, apply_xform, bone_remap, skin, nullptr, Vector<Ref<ArrayMesh>>(), false);
|
||||
Error err = _create_mesh_surfaces(false, mesh, ng2->material_map, meshdata, apply_xform, bone_remap, skin, nullptr, Vector<Ref<EditorSceneImporterMesh>>(), false);
|
||||
ERR_FAIL_COND_V(err, err);
|
||||
|
||||
morphs.push_back(mesh);
|
||||
|
@ -1109,7 +1110,7 @@ Error ColladaImport::_create_resources(Collada::Node *p_node, bool p_use_compres
|
|||
meshid = ng2->source;
|
||||
}
|
||||
|
||||
Ref<ArrayMesh> mesh;
|
||||
Ref<EditorSceneImporterMesh> mesh;
|
||||
if (mesh_cache.has(meshid)) {
|
||||
mesh = mesh_cache[meshid];
|
||||
} else {
|
||||
|
@ -1117,7 +1118,7 @@ Error ColladaImport::_create_resources(Collada::Node *p_node, bool p_use_compres
|
|||
//bleh, must ignore invalid
|
||||
|
||||
ERR_FAIL_COND_V(!collada.state.mesh_data_map.has(meshid), ERR_INVALID_DATA);
|
||||
mesh = Ref<ArrayMesh>(memnew(ArrayMesh));
|
||||
mesh = Ref<EditorSceneImporterMesh>(memnew(EditorSceneImporterMesh));
|
||||
const Collada::MeshData &meshdata = collada.state.mesh_data_map[meshid];
|
||||
mesh->set_name(meshdata.name);
|
||||
Error err = _create_mesh_surfaces(morphs.size() == 0, mesh, ng2->material_map, meshdata, apply_xform, bone_remap, skin, morph, morphs, p_use_compression, use_mesh_builtin_materials);
|
||||
|
|
|
@ -970,8 +970,6 @@ Error EditorSceneImporterGLTF::_parse_meshes(GLTFState &state) {
|
|||
return OK;
|
||||
}
|
||||
|
||||
uint32_t mesh_flags = 0;
|
||||
|
||||
Array meshes = state.json["meshes"];
|
||||
for (GLTFMeshIndex i = 0; i < meshes.size(); i++) {
|
||||
print_verbose("glTF: Parsing mesh: " + itos(i));
|
||||
|
@ -979,6 +977,7 @@ Error EditorSceneImporterGLTF::_parse_meshes(GLTFState &state) {
|
|||
|
||||
GLTFMesh mesh;
|
||||
mesh.mesh.instance();
|
||||
bool has_vertex_color = false;
|
||||
|
||||
ERR_FAIL_COND_V(!d.has("primitives"), ERR_PARSE_ERROR);
|
||||
|
||||
|
@ -1034,6 +1033,7 @@ Error EditorSceneImporterGLTF::_parse_meshes(GLTFState &state) {
|
|||
}
|
||||
if (a.has("COLOR_0")) {
|
||||
array[Mesh::ARRAY_COLOR] = _decode_accessor_as_color(state, a["COLOR_0"], true);
|
||||
has_vertex_color = true;
|
||||
}
|
||||
if (a.has("JOINTS_0")) {
|
||||
array[Mesh::ARRAY_BONES] = _decode_accessor_as_ints(state, a["JOINTS_0"], true);
|
||||
|
@ -1112,7 +1112,7 @@ Error EditorSceneImporterGLTF::_parse_meshes(GLTFState &state) {
|
|||
|
||||
//ideally BLEND_SHAPE_MODE_RELATIVE since gltf2 stores in displacement
|
||||
//but it could require a larger refactor?
|
||||
mesh.mesh->set_blend_shape_mode(ArrayMesh::BLEND_SHAPE_MODE_NORMALIZED);
|
||||
mesh.mesh->set_blend_shape_mode(Mesh::BLEND_SHAPE_MODE_NORMALIZED);
|
||||
|
||||
if (j == 0) {
|
||||
const Array &target_names = extras.has("targetNames") ? (Array)extras["targetNames"] : Array();
|
||||
|
@ -1226,21 +1226,25 @@ Error EditorSceneImporterGLTF::_parse_meshes(GLTFState &state) {
|
|||
}
|
||||
|
||||
//just add it
|
||||
mesh.mesh->add_surface_from_arrays(primitive, array, morphs, Dictionary(), mesh_flags);
|
||||
|
||||
Ref<Material> mat;
|
||||
if (p.has("material")) {
|
||||
const int material = p["material"];
|
||||
ERR_FAIL_INDEX_V(material, state.materials.size(), ERR_FILE_CORRUPT);
|
||||
const Ref<Material> &mat = state.materials[material];
|
||||
Ref<StandardMaterial3D> mat3d = state.materials[material];
|
||||
if (has_vertex_color) {
|
||||
mat3d->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true);
|
||||
}
|
||||
mat = mat3d;
|
||||
|
||||
mesh.mesh->surface_set_material(mesh.mesh->get_surface_count() - 1, mat);
|
||||
} else {
|
||||
Ref<StandardMaterial3D> mat;
|
||||
mat.instance();
|
||||
mat->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true);
|
||||
|
||||
mesh.mesh->surface_set_material(mesh.mesh->get_surface_count() - 1, mat);
|
||||
} else if (has_vertex_color) {
|
||||
Ref<StandardMaterial3D> mat3d;
|
||||
mat3d.instance();
|
||||
mat3d->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true);
|
||||
mat = mat3d;
|
||||
}
|
||||
|
||||
mesh.mesh->add_surface(primitive, array, morphs, Dictionary(), mat);
|
||||
}
|
||||
|
||||
mesh.blend_weights.resize(mesh.mesh->get_blend_shape_count());
|
||||
|
@ -1440,7 +1444,8 @@ Error EditorSceneImporterGLTF::_parse_materials(GLTFState &state) {
|
|||
if (d.has("name")) {
|
||||
material->set_name(d["name"]);
|
||||
}
|
||||
material->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true);
|
||||
//don't do this here only if vertex color exists
|
||||
//material->set_flag(StandardMaterial3D::FLAG_ALBEDO_FROM_VERTEX_COLOR, true);
|
||||
|
||||
if (d.has("pbrMetallicRoughness")) {
|
||||
const Dictionary &mr = d["pbrMetallicRoughness"];
|
||||
|
@ -2586,12 +2591,12 @@ BoneAttachment3D *EditorSceneImporterGLTF::_generate_bone_attachment(GLTFState &
|
|||
return bone_attachment;
|
||||
}
|
||||
|
||||
MeshInstance3D *EditorSceneImporterGLTF::_generate_mesh_instance(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index) {
|
||||
EditorSceneImporterMeshNode *EditorSceneImporterGLTF::_generate_mesh_instance(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index) {
|
||||
const GLTFNode *gltf_node = state.nodes[node_index];
|
||||
|
||||
ERR_FAIL_INDEX_V(gltf_node->mesh, state.meshes.size(), nullptr);
|
||||
|
||||
MeshInstance3D *mi = memnew(MeshInstance3D);
|
||||
EditorSceneImporterMeshNode *mi = memnew(EditorSceneImporterMeshNode);
|
||||
print_verbose("glTF: Creating mesh for: " + gltf_node->name);
|
||||
|
||||
GLTFMesh &mesh = state.meshes.write[gltf_node->mesh];
|
||||
|
@ -3058,7 +3063,7 @@ void EditorSceneImporterGLTF::_process_mesh_instances(GLTFState &state, Node3D *
|
|||
const GLTFSkinIndex skin_i = node->skin;
|
||||
|
||||
Map<GLTFNodeIndex, Node *>::Element *mi_element = state.scene_nodes.find(node_i);
|
||||
MeshInstance3D *mi = Object::cast_to<MeshInstance3D>(mi_element->get());
|
||||
EditorSceneImporterMeshNode *mi = Object::cast_to<EditorSceneImporterMeshNode>(mi_element->get());
|
||||
ERR_FAIL_COND(mi == nullptr);
|
||||
|
||||
const GLTFSkeletonIndex skel_i = state.skins[node->skin].skeleton;
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
class AnimationPlayer;
|
||||
class BoneAttachment3D;
|
||||
class MeshInstance3D;
|
||||
class EditorSceneImporterMeshNode;
|
||||
|
||||
class EditorSceneImporterGLTF : public EditorSceneImporter {
|
||||
GDCLASS(EditorSceneImporterGLTF, EditorSceneImporter);
|
||||
|
@ -199,7 +199,7 @@ class EditorSceneImporterGLTF : public EditorSceneImporter {
|
|||
};
|
||||
|
||||
struct GLTFMesh {
|
||||
Ref<ArrayMesh> mesh;
|
||||
Ref<EditorSceneImporterMesh> mesh;
|
||||
Vector<float> blend_weights;
|
||||
};
|
||||
|
||||
|
@ -262,7 +262,7 @@ class EditorSceneImporterGLTF : public EditorSceneImporter {
|
|||
Vector<GLTFAccessor> accessors;
|
||||
|
||||
Vector<GLTFMesh> meshes; //meshes are loaded directly, no reason not to.
|
||||
Vector<Ref<Material>> materials;
|
||||
Vector<Ref<StandardMaterial3D>> materials;
|
||||
|
||||
String scene_name;
|
||||
Vector<int> root_nodes;
|
||||
|
@ -355,7 +355,7 @@ class EditorSceneImporterGLTF : public EditorSceneImporter {
|
|||
Error _parse_animations(GLTFState &state);
|
||||
|
||||
BoneAttachment3D *_generate_bone_attachment(GLTFState &state, Skeleton3D *skeleton, const GLTFNodeIndex node_index);
|
||||
MeshInstance3D *_generate_mesh_instance(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index);
|
||||
EditorSceneImporterMeshNode *_generate_mesh_instance(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index);
|
||||
Camera3D *_generate_camera(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index);
|
||||
Light3D *_generate_light(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index);
|
||||
Node3D *_generate_spatial(GLTFState &state, Node *scene_parent, const GLTFNodeIndex node_index);
|
||||
|
|
|
@ -225,6 +225,8 @@ static Error _parse_obj(const String &p_path, List<Ref<Mesh>> &r_meshes, bool p_
|
|||
String current_material_library;
|
||||
String current_material;
|
||||
String current_group;
|
||||
uint32_t smooth_group = 0;
|
||||
bool smoothing = true;
|
||||
|
||||
while (true) {
|
||||
String l = f->get_line().strip_edges();
|
||||
|
@ -315,6 +317,10 @@ static Error _parse_obj(const String &p_path, List<Ref<Mesh>> &r_meshes, bool p_
|
|||
Vector3 vertex = vertices[vtx];
|
||||
//if (weld_vertices)
|
||||
// vertex.snap(Vector3(weld_tolerance, weld_tolerance, weld_tolerance));
|
||||
if (!smoothing) {
|
||||
smooth_group++;
|
||||
}
|
||||
surf_tool->set_smooth_group(smooth_group);
|
||||
surf_tool->add_vertex(vertex);
|
||||
}
|
||||
|
||||
|
@ -322,10 +328,15 @@ static Error _parse_obj(const String &p_path, List<Ref<Mesh>> &r_meshes, bool p_
|
|||
}
|
||||
} else if (l.begins_with("s ")) { //smoothing
|
||||
String what = l.substr(2, l.length()).strip_edges();
|
||||
bool do_smooth;
|
||||
if (what == "off") {
|
||||
surf_tool->add_smooth_group(false);
|
||||
do_smooth = false;
|
||||
} else {
|
||||
surf_tool->add_smooth_group(true);
|
||||
do_smooth = true;
|
||||
}
|
||||
if (do_smooth != smoothing) {
|
||||
smooth_group++;
|
||||
smoothing = do_smooth;
|
||||
}
|
||||
} else if (/*l.begins_with("g ") ||*/ l.begins_with("usemtl ") || (l.begins_with("o ") || f->eof_reached())) { //commit group to mesh
|
||||
//groups are too annoying
|
||||
|
@ -426,8 +437,15 @@ Node *EditorOBJImporter::import_scene(const String &p_path, uint32_t p_flags, in
|
|||
Node3D *scene = memnew(Node3D);
|
||||
|
||||
for (List<Ref<Mesh>>::Element *E = meshes.front(); E; E = E->next()) {
|
||||
MeshInstance3D *mi = memnew(MeshInstance3D);
|
||||
mi->set_mesh(E->get());
|
||||
Ref<EditorSceneImporterMesh> mesh;
|
||||
mesh.instance();
|
||||
Ref<Mesh> m = E->get();
|
||||
for (int i = 0; i < m->get_surface_count(); i++) {
|
||||
mesh->add_surface(m->surface_get_primitive_type(i), m->surface_get_arrays(i), Array(), Dictionary(), m->surface_get_material(i));
|
||||
}
|
||||
|
||||
EditorSceneImporterMeshNode *mi = memnew(EditorSceneImporterMeshNode);
|
||||
mi->set_mesh(mesh);
|
||||
mi->set_name(E->get()->get_name());
|
||||
scene->add_child(mi);
|
||||
mi->set_owner(scene);
|
||||
|
|
|
@ -119,6 +119,304 @@ void EditorSceneImporter::_bind_methods() {
|
|||
BIND_CONSTANT(IMPORT_USE_COMPRESSION);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
|
||||
void EditorSceneImporterMesh::add_blend_shape(const String &p_name) {
|
||||
ERR_FAIL_COND(surfaces.size() > 0);
|
||||
blend_shapes.push_back(p_name);
|
||||
}
|
||||
|
||||
int EditorSceneImporterMesh::get_blend_shape_count() const {
|
||||
return blend_shapes.size();
|
||||
}
|
||||
|
||||
String EditorSceneImporterMesh::get_blend_shape_name(int p_blend_shape) const {
|
||||
ERR_FAIL_INDEX_V(p_blend_shape, blend_shapes.size(), String());
|
||||
return blend_shapes[p_blend_shape];
|
||||
}
|
||||
|
||||
void EditorSceneImporterMesh::set_blend_shape_mode(Mesh::BlendShapeMode p_blend_shape_mode) {
|
||||
blend_shape_mode = p_blend_shape_mode;
|
||||
}
|
||||
Mesh::BlendShapeMode EditorSceneImporterMesh::get_blend_shape_mode() const {
|
||||
return blend_shape_mode;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMesh::add_surface(Mesh::PrimitiveType p_primitive, const Array &p_arrays, const Array &p_blend_shapes, const Dictionary &p_lods, const Ref<Material> &p_material, const String &p_name) {
|
||||
ERR_FAIL_COND(p_blend_shapes.size() != blend_shapes.size());
|
||||
ERR_FAIL_COND(p_arrays.size() != Mesh::ARRAY_MAX);
|
||||
Surface s;
|
||||
s.primitive = p_primitive;
|
||||
s.arrays = p_arrays;
|
||||
s.name = p_name;
|
||||
|
||||
for (int i = 0; i < blend_shapes.size(); i++) {
|
||||
Array bsdata = p_blend_shapes[i];
|
||||
ERR_FAIL_COND(bsdata.size() != Mesh::ARRAY_MAX);
|
||||
Surface::BlendShape bs;
|
||||
bs.arrays = bsdata;
|
||||
s.blend_shape_data.push_back(bs);
|
||||
}
|
||||
|
||||
List<Variant> lods;
|
||||
p_lods.get_key_list(&lods);
|
||||
for (List<Variant>::Element *E = lods.front(); E; E = E->next()) {
|
||||
ERR_CONTINUE(!E->get().is_num());
|
||||
Surface::LOD lod;
|
||||
lod.distance = E->get();
|
||||
lod.indices = p_lods[E->get()];
|
||||
ERR_CONTINUE(lod.indices.size() == 0);
|
||||
s.lods.push_back(lod);
|
||||
}
|
||||
|
||||
s.material = p_material;
|
||||
|
||||
surfaces.push_back(s);
|
||||
mesh.unref();
|
||||
}
|
||||
int EditorSceneImporterMesh::get_surface_count() const {
|
||||
return surfaces.size();
|
||||
}
|
||||
|
||||
Mesh::PrimitiveType EditorSceneImporterMesh::get_surface_primitive_type(int p_surface) {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), Mesh::PRIMITIVE_MAX);
|
||||
return surfaces[p_surface].primitive;
|
||||
}
|
||||
Array EditorSceneImporterMesh::get_surface_arrays(int p_surface) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), Array());
|
||||
return surfaces[p_surface].arrays;
|
||||
}
|
||||
String EditorSceneImporterMesh::get_surface_name(int p_surface) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), String());
|
||||
return surfaces[p_surface].name;
|
||||
}
|
||||
Array EditorSceneImporterMesh::get_surface_blend_shape_arrays(int p_surface, int p_blend_shape) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), Array());
|
||||
ERR_FAIL_INDEX_V(p_blend_shape, surfaces[p_surface].blend_shape_data.size(), Array());
|
||||
return surfaces[p_surface].blend_shape_data[p_blend_shape].arrays;
|
||||
}
|
||||
int EditorSceneImporterMesh::get_surface_lod_count(int p_surface) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), 0);
|
||||
return surfaces[p_surface].lods.size();
|
||||
}
|
||||
Vector<int> EditorSceneImporterMesh::get_surface_lod_indices(int p_surface, int p_lod) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), Vector<int>());
|
||||
ERR_FAIL_INDEX_V(p_lod, surfaces[p_surface].lods.size(), Vector<int>());
|
||||
|
||||
return surfaces[p_surface].lods[p_lod].indices;
|
||||
}
|
||||
|
||||
float EditorSceneImporterMesh::get_surface_lod_size(int p_surface, int p_lod) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), 0);
|
||||
ERR_FAIL_INDEX_V(p_lod, surfaces[p_surface].lods.size(), 0);
|
||||
return surfaces[p_surface].lods[p_lod].distance;
|
||||
}
|
||||
|
||||
Ref<Material> EditorSceneImporterMesh::get_surface_material(int p_surface) const {
|
||||
ERR_FAIL_INDEX_V(p_surface, surfaces.size(), Ref<Material>());
|
||||
return surfaces[p_surface].material;
|
||||
}
|
||||
|
||||
bool EditorSceneImporterMesh::has_mesh() const {
|
||||
return mesh.is_valid();
|
||||
}
|
||||
|
||||
Ref<ArrayMesh> EditorSceneImporterMesh::get_mesh() {
|
||||
ERR_FAIL_COND_V(surfaces.size() == 0, Ref<ArrayMesh>());
|
||||
|
||||
if (mesh.is_null()) {
|
||||
mesh.instance();
|
||||
for (int i = 0; i < blend_shapes.size(); i++) {
|
||||
mesh->add_blend_shape(blend_shapes[i]);
|
||||
}
|
||||
mesh->set_blend_shape_mode(blend_shape_mode);
|
||||
for (int i = 0; i < surfaces.size(); i++) {
|
||||
Array bs_data;
|
||||
if (surfaces[i].blend_shape_data.size()) {
|
||||
for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) {
|
||||
bs_data.push_back(surfaces[i].blend_shape_data[j].arrays);
|
||||
}
|
||||
}
|
||||
Dictionary lods;
|
||||
if (surfaces[i].lods.size()) {
|
||||
for (int j = 0; j < surfaces[i].lods.size(); j++) {
|
||||
lods[surfaces[i].lods[j].distance] = surfaces[i].lods[j].indices;
|
||||
}
|
||||
}
|
||||
|
||||
mesh->add_surface_from_arrays(surfaces[i].primitive, surfaces[i].arrays, bs_data, lods);
|
||||
if (surfaces[i].material.is_valid()) {
|
||||
mesh->surface_set_material(mesh->get_surface_count() - 1, surfaces[i].material);
|
||||
}
|
||||
if (surfaces[i].name != String()) {
|
||||
mesh->surface_set_name(mesh->get_surface_count() - 1, surfaces[i].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mesh;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMesh::clear() {
|
||||
surfaces.clear();
|
||||
blend_shapes.clear();
|
||||
mesh.unref();
|
||||
}
|
||||
|
||||
void EditorSceneImporterMesh::_set_data(const Dictionary &p_data) {
|
||||
clear();
|
||||
if (p_data.has("blend_shape_names")) {
|
||||
blend_shapes = p_data["blend_shape_names"];
|
||||
}
|
||||
if (p_data.has("surfaces")) {
|
||||
Array surface_arr = p_data["surfaces"];
|
||||
for (int i = 0; i < surface_arr.size(); i++) {
|
||||
Dictionary s = surface_arr[i];
|
||||
ERR_CONTINUE(!s.has("primitive"));
|
||||
ERR_CONTINUE(!s.has("arrays"));
|
||||
Mesh::PrimitiveType prim = Mesh::PrimitiveType(int(s["primitive"]));
|
||||
ERR_CONTINUE(prim >= Mesh::PRIMITIVE_MAX);
|
||||
Array arr = s["arrays"];
|
||||
Dictionary lods;
|
||||
String name;
|
||||
if (s.has("name")) {
|
||||
name = s["name"];
|
||||
}
|
||||
if (s.has("lods")) {
|
||||
lods = s["lods"];
|
||||
}
|
||||
Array blend_shapes;
|
||||
if (s.has("blend_shapes")) {
|
||||
blend_shapes = s["blend_shapes"];
|
||||
}
|
||||
Ref<Material> material;
|
||||
if (s.has("material")) {
|
||||
material = s["material"];
|
||||
}
|
||||
add_surface(prim, arr, blend_shapes, lods, material, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
Dictionary EditorSceneImporterMesh::_get_data() const {
|
||||
Dictionary data;
|
||||
if (blend_shapes.size()) {
|
||||
data["blend_shape_names"] = blend_shapes;
|
||||
}
|
||||
Array surface_arr;
|
||||
for (int i = 0; i < surfaces.size(); i++) {
|
||||
Dictionary d;
|
||||
d["primitive"] = surfaces[i].primitive;
|
||||
d["arrays"] = surfaces[i].arrays;
|
||||
if (surfaces[i].blend_shape_data.size()) {
|
||||
Array bs_data;
|
||||
for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) {
|
||||
bs_data.push_back(surfaces[i].blend_shape_data[j].arrays);
|
||||
}
|
||||
d["blend_shapes"] = bs_data;
|
||||
}
|
||||
if (surfaces[i].lods.size()) {
|
||||
Dictionary lods;
|
||||
for (int j = 0; j < surfaces[i].lods.size(); j++) {
|
||||
lods[surfaces[i].lods[j].distance] = surfaces[i].lods[j].indices;
|
||||
}
|
||||
d["lods"] = lods;
|
||||
}
|
||||
|
||||
if (surfaces[i].material.is_valid()) {
|
||||
d["material"] = surfaces[i].material;
|
||||
}
|
||||
|
||||
if (surfaces[i].name != String()) {
|
||||
d["name"] = surfaces[i].name;
|
||||
}
|
||||
|
||||
surface_arr.push_back(d);
|
||||
}
|
||||
data["surfaces"] = surface_arr;
|
||||
return data;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMesh::_bind_methods() {
|
||||
ClassDB::bind_method(D_METHOD("add_blend_shape", "name"), &EditorSceneImporterMesh::add_blend_shape);
|
||||
ClassDB::bind_method(D_METHOD("get_blend_shape_count"), &EditorSceneImporterMesh::get_blend_shape_count);
|
||||
ClassDB::bind_method(D_METHOD("get_blend_shape_name", "blend_shape_idx"), &EditorSceneImporterMesh::get_blend_shape_name);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("set_blend_shape_mode", "mode"), &EditorSceneImporterMesh::set_blend_shape_mode);
|
||||
ClassDB::bind_method(D_METHOD("get_blend_shape_mode"), &EditorSceneImporterMesh::get_blend_shape_mode);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("add_surface", "primitive", "arrays", "blend_shapes", "lods", "material"), &EditorSceneImporterMesh::add_surface, DEFVAL(Array()), DEFVAL(Dictionary()), DEFVAL(Ref<Material>()), DEFVAL(String()));
|
||||
|
||||
ClassDB::bind_method(D_METHOD("get_surface_count"), &EditorSceneImporterMesh::get_surface_count);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_primitive_type", "surface_idx"), &EditorSceneImporterMesh::get_surface_primitive_type);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_name", "surface_idx"), &EditorSceneImporterMesh::get_surface_name);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_arrays", "surface_idx"), &EditorSceneImporterMesh::get_surface_arrays);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_blend_shape_arrays", "surface_idx", "blend_shape_idx"), &EditorSceneImporterMesh::get_surface_blend_shape_arrays);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_lod_count", "surface_idx"), &EditorSceneImporterMesh::get_surface_lod_count);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_lod_size", "surface_idx", "lod_idx"), &EditorSceneImporterMesh::get_surface_lod_size);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_lod_indices", "surface_idx", "lod_idx"), &EditorSceneImporterMesh::get_surface_lod_indices);
|
||||
ClassDB::bind_method(D_METHOD("get_surface_material", "surface_idx"), &EditorSceneImporterMesh::get_surface_material);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("get_mesh"), &EditorSceneImporterMesh::get_mesh);
|
||||
ClassDB::bind_method(D_METHOD("clear"), &EditorSceneImporterMesh::clear);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("_set_data", "data"), &EditorSceneImporterMesh::_set_data);
|
||||
ClassDB::bind_method(D_METHOD("_get_data"), &EditorSceneImporterMesh::_get_data);
|
||||
|
||||
ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "_data", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR), "_set_data", "_get_data");
|
||||
}
|
||||
|
||||
void EditorSceneImporterMeshNode::set_mesh(const Ref<EditorSceneImporterMesh> &p_mesh) {
|
||||
mesh = p_mesh;
|
||||
}
|
||||
Ref<EditorSceneImporterMesh> EditorSceneImporterMeshNode::get_mesh() const {
|
||||
return mesh;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMeshNode::set_skin(const Ref<Skin> &p_skin) {
|
||||
skin = p_skin;
|
||||
}
|
||||
Ref<Skin> EditorSceneImporterMeshNode::get_skin() const {
|
||||
return skin;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMeshNode::set_surface_material(int p_idx, const Ref<Material> &p_material) {
|
||||
ERR_FAIL_COND(p_idx < 0);
|
||||
if (p_idx >= surface_materials.size()) {
|
||||
surface_materials.resize(p_idx + 1);
|
||||
}
|
||||
|
||||
surface_materials.write[p_idx] = p_material;
|
||||
}
|
||||
Ref<Material> EditorSceneImporterMeshNode::get_surface_material(int p_idx) const {
|
||||
ERR_FAIL_COND_V(p_idx < 0, Ref<Material>());
|
||||
if (p_idx >= surface_materials.size()) {
|
||||
return Ref<Material>();
|
||||
}
|
||||
return surface_materials[p_idx];
|
||||
}
|
||||
|
||||
void EditorSceneImporterMeshNode::set_skeleton_path(const NodePath &p_path) {
|
||||
skeleton_path = p_path;
|
||||
}
|
||||
NodePath EditorSceneImporterMeshNode::get_skeleton_path() const {
|
||||
return skeleton_path;
|
||||
}
|
||||
|
||||
void EditorSceneImporterMeshNode::_bind_methods() {
|
||||
ClassDB::bind_method(D_METHOD("set_mesh", "mesh"), &EditorSceneImporterMeshNode::set_mesh);
|
||||
ClassDB::bind_method(D_METHOD("get_mesh"), &EditorSceneImporterMeshNode::get_mesh);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("set_skin", "skin"), &EditorSceneImporterMeshNode::set_skin);
|
||||
ClassDB::bind_method(D_METHOD("get_skin"), &EditorSceneImporterMeshNode::get_skin);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("set_skeleton_path", "skeleton_path"), &EditorSceneImporterMeshNode::set_skeleton_path);
|
||||
ClassDB::bind_method(D_METHOD("get_skeleton_path"), &EditorSceneImporterMeshNode::get_skeleton_path);
|
||||
|
||||
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "mesh", PROPERTY_HINT_RESOURCE_TYPE, "EditorSceneImporterMesh"), "set_mesh", "get_mesh");
|
||||
ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "skin", PROPERTY_HINT_RESOURCE_TYPE, "Skin"), "set_skin", "get_skin");
|
||||
ADD_PROPERTY(PropertyInfo(Variant::NODE_PATH, "skeleton_path", PROPERTY_HINT_NODE_PATH_VALID_TYPES, "Skeleton"), "set_skeleton_path", "get_skeleton_path");
|
||||
}
|
||||
|
||||
/////////////////////////////////
|
||||
void EditorScenePostImport::_bind_methods() {
|
||||
BIND_VMETHOD(MethodInfo(Variant::OBJECT, "post_import", PropertyInfo(Variant::OBJECT, "scene")));
|
||||
|
@ -1219,6 +1517,34 @@ Ref<Animation> ResourceImporterScene::import_animation_from_other_importer(Edito
|
|||
return importer->import_animation(p_path, p_flags, p_bake_fps);
|
||||
}
|
||||
|
||||
void ResourceImporterScene::_generate_meshes(Node *p_node) {
|
||||
EditorSceneImporterMeshNode *src_mesh = Object::cast_to<EditorSceneImporterMeshNode>(p_node);
|
||||
if (src_mesh != nullptr) {
|
||||
//is mesh
|
||||
MeshInstance3D *mesh_node = memnew(MeshInstance3D);
|
||||
mesh_node->set_transform(src_mesh->get_transform());
|
||||
mesh_node->set_skin(src_mesh->get_skin());
|
||||
mesh_node->set_skeleton_path(src_mesh->get_skeleton_path());
|
||||
|
||||
Ref<ArrayMesh> mesh;
|
||||
if (!src_mesh->get_mesh()->has_mesh()) {
|
||||
//do mesh processing
|
||||
}
|
||||
mesh = src_mesh->get_mesh()->get_mesh();
|
||||
mesh_node->set_mesh(mesh);
|
||||
for (int i = 0; i < mesh->get_surface_count(); i++) {
|
||||
mesh_node->set_surface_material(i, src_mesh->get_surface_material(i));
|
||||
}
|
||||
|
||||
p_node->replace_by(mesh_node);
|
||||
memdelete(p_node);
|
||||
p_node = mesh_node;
|
||||
}
|
||||
|
||||
for (int i = 0; i < p_node->get_child_count(); i++) {
|
||||
_generate_meshes(p_node->get_child(i));
|
||||
}
|
||||
}
|
||||
Error ResourceImporterScene::import(const String &p_source_file, const String &p_save_path, const Map<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
|
||||
const String &src_path = p_source_file;
|
||||
|
||||
|
@ -1315,6 +1641,8 @@ Error ResourceImporterScene::import(const String &p_source_file, const String &p
|
|||
scene->set_name(p_save_path.get_file().get_basename());
|
||||
}
|
||||
|
||||
_generate_meshes(scene);
|
||||
|
||||
err = OK;
|
||||
|
||||
String animation_filter = String(p_options["animation/filter_script"]).strip_edges();
|
||||
|
|
|
@ -32,9 +32,11 @@
|
|||
#define RESOURCEIMPORTERSCENE_H
|
||||
|
||||
#include "core/io/resource_importer.h"
|
||||
#include "scene/3d/node_3d.h"
|
||||
#include "scene/resources/animation.h"
|
||||
#include "scene/resources/mesh.h"
|
||||
#include "scene/resources/shape_3d.h"
|
||||
#include "scene/resources/skin.h"
|
||||
|
||||
class Material;
|
||||
|
||||
|
@ -88,6 +90,90 @@ public:
|
|||
EditorScenePostImport();
|
||||
};
|
||||
|
||||
// The following classes are used by importers instead of ArrayMesh and MeshInstance3D
|
||||
// so the data is not reginstered (hence, quality loss), importing happens faster and
|
||||
// its easier to modify before saving
|
||||
|
||||
class EditorSceneImporterMesh : public Resource {
|
||||
GDCLASS(EditorSceneImporterMesh, Resource)
|
||||
|
||||
struct Surface {
|
||||
Mesh::PrimitiveType primitive;
|
||||
Array arrays;
|
||||
struct BlendShape {
|
||||
Array arrays;
|
||||
};
|
||||
Vector<BlendShape> blend_shape_data;
|
||||
struct LOD {
|
||||
Vector<int> indices;
|
||||
float distance;
|
||||
};
|
||||
Vector<LOD> lods;
|
||||
Ref<Material> material;
|
||||
String name;
|
||||
};
|
||||
Vector<Surface> surfaces;
|
||||
Vector<String> blend_shapes;
|
||||
Mesh::BlendShapeMode blend_shape_mode = Mesh::BLEND_SHAPE_MODE_NORMALIZED;
|
||||
|
||||
Ref<ArrayMesh> mesh;
|
||||
|
||||
protected:
|
||||
void _set_data(const Dictionary &p_data);
|
||||
Dictionary _get_data() const;
|
||||
|
||||
static void _bind_methods();
|
||||
|
||||
public:
|
||||
void add_blend_shape(const String &p_name);
|
||||
int get_blend_shape_count() const;
|
||||
String get_blend_shape_name(int p_blend_shape) const;
|
||||
|
||||
void add_surface(Mesh::PrimitiveType p_primitive, const Array &p_arrays, const Array &p_blend_shapes = Array(), const Dictionary &p_lods = Dictionary(), const Ref<Material> &p_material = Ref<Material>(), const String &p_name = String());
|
||||
int get_surface_count() const;
|
||||
|
||||
void set_blend_shape_mode(Mesh::BlendShapeMode p_blend_shape_mode);
|
||||
Mesh::BlendShapeMode get_blend_shape_mode() const;
|
||||
|
||||
Mesh::PrimitiveType get_surface_primitive_type(int p_surface);
|
||||
String get_surface_name(int p_surface) const;
|
||||
Array get_surface_arrays(int p_surface) const;
|
||||
Array get_surface_blend_shape_arrays(int p_surface, int p_blend_shape) const;
|
||||
int get_surface_lod_count(int p_surface) const;
|
||||
Vector<int> get_surface_lod_indices(int p_surface, int p_lod) const;
|
||||
float get_surface_lod_size(int p_surface, int p_lod) const;
|
||||
Ref<Material> get_surface_material(int p_surface) const;
|
||||
|
||||
bool has_mesh() const;
|
||||
Ref<ArrayMesh> get_mesh();
|
||||
void clear();
|
||||
};
|
||||
|
||||
class EditorSceneImporterMeshNode : public Node3D {
|
||||
GDCLASS(EditorSceneImporterMeshNode, Node3D)
|
||||
|
||||
Ref<EditorSceneImporterMesh> mesh;
|
||||
Ref<Skin> skin;
|
||||
NodePath skeleton_path;
|
||||
Vector<Ref<Material>> surface_materials;
|
||||
|
||||
protected:
|
||||
static void _bind_methods();
|
||||
|
||||
public:
|
||||
void set_mesh(const Ref<EditorSceneImporterMesh> &p_mesh);
|
||||
Ref<EditorSceneImporterMesh> get_mesh() const;
|
||||
|
||||
void set_skin(const Ref<Skin> &p_skin);
|
||||
Ref<Skin> get_skin() const;
|
||||
|
||||
void set_surface_material(int p_idx, const Ref<Material> &p_material);
|
||||
Ref<Material> get_surface_material(int p_idx) const;
|
||||
|
||||
void set_skeleton_path(const NodePath &p_path);
|
||||
NodePath get_skeleton_path() const;
|
||||
};
|
||||
|
||||
class ResourceImporterScene : public ResourceImporter {
|
||||
GDCLASS(ResourceImporterScene, ResourceImporter);
|
||||
|
||||
|
@ -119,6 +205,7 @@ class ResourceImporterScene : public ResourceImporter {
|
|||
};
|
||||
|
||||
void _replace_owner(Node *p_node, Node *p_scene, Node *p_new_owner);
|
||||
void _generate_meshes(Node *p_node);
|
||||
|
||||
public:
|
||||
static ResourceImporterScene *get_singleton() { return singleton; }
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
env_meshoptimizer = env_modules.Clone()
|
||||
|
||||
# Thirdparty source files
|
||||
thirdparty_dir = "#thirdparty/meshoptimizer/"
|
||||
thirdparty_sources = [
|
||||
"allocator.cpp",
|
||||
"clusterizer.cpp",
|
||||
"indexcodec.cpp",
|
||||
"indexgenerator.cpp",
|
||||
"overdrawanalyzer.cpp",
|
||||
"overdrawoptimizer.cpp",
|
||||
"simplifier.cpp",
|
||||
"spatialorder.cpp",
|
||||
"stripifier.cpp",
|
||||
"vcacheanalyzer.cpp",
|
||||
"vcacheoptimizer.cpp",
|
||||
"vertexcodec.cpp",
|
||||
"vertexfilter.cpp",
|
||||
"vfetchanalyzer.cpp",
|
||||
"vfetchoptimizer.cpp",
|
||||
]
|
||||
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
|
||||
|
||||
|
||||
env_thirdparty = env_meshoptimizer.Clone()
|
||||
env_thirdparty.disable_warnings()
|
||||
env_thirdparty.add_source_files(env.modules_sources, thirdparty_sources)
|
||||
|
||||
env_modules.add_source_files(env.modules_sources, ["register_types.cpp"])
|
|
@ -0,0 +1,7 @@
|
|||
def can_build(env, platform):
|
||||
# Having this on release by default, it's small and a lot of users like to do procedural stuff
|
||||
return True
|
||||
|
||||
|
||||
def configure(env):
|
||||
pass
|
|
@ -0,0 +1,43 @@
|
|||
/*************************************************************************/
|
||||
/* register_types.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "register_types.h"
|
||||
#include "scene/resources/surface_tool.h"
|
||||
#include "thirdparty/meshoptimizer/meshoptimizer.h"
|
||||
|
||||
void register_meshoptimizer_types() {
|
||||
SurfaceTool::optimize_vertex_cache_func = meshopt_optimizeVertexCache;
|
||||
SurfaceTool::simplify_func = meshopt_simplify;
|
||||
}
|
||||
|
||||
void unregister_meshoptimizer_types() {
|
||||
SurfaceTool::optimize_vertex_cache_func = nullptr;
|
||||
SurfaceTool::simplify_func = nullptr;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*************************************************************************/
|
||||
/* register_types.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef MESHOPTIMIZER_REGISTER_TYPES_H
|
||||
#define MESHOPTIMIZER_REGISTER_TYPES_H
|
||||
|
||||
void register_meshoptimizer_types();
|
||||
void unregister_meshoptimizer_types();
|
||||
|
||||
#endif // PVR_REGISTER_TYPES_H
|
|
@ -533,6 +533,9 @@ void Mesh::_bind_methods() {
|
|||
BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_2D_VERTICES);
|
||||
BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_DYNAMIC_UPDATE);
|
||||
BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_8_BONE_WEIGHTS);
|
||||
|
||||
BIND_ENUM_CONSTANT(BLEND_SHAPE_MODE_NORMALIZED);
|
||||
BIND_ENUM_CONSTANT(BLEND_SHAPE_MODE_RELATIVE);
|
||||
}
|
||||
|
||||
void Mesh::clear_cache() const {
|
||||
|
@ -1384,7 +1387,7 @@ bool (*array_mesh_lightmap_unwrap_callback)(float p_texel_size, const float *p_v
|
|||
|
||||
struct ArrayMeshLightmapSurface {
|
||||
Ref<Material> material;
|
||||
Vector<SurfaceTool::Vertex> vertices;
|
||||
LocalVector<SurfaceTool::Vertex> vertices;
|
||||
Mesh::PrimitiveType primitive;
|
||||
uint32_t format;
|
||||
};
|
||||
|
@ -1425,7 +1428,7 @@ Error ArrayMesh::lightmap_unwrap_cached(int *&r_cache_data, unsigned int &r_cach
|
|||
|
||||
Array arrays = surface_get_arrays(i);
|
||||
s.material = surface_get_material(i);
|
||||
s.vertices = SurfaceTool::create_vertex_array_from_triangle_arrays(arrays);
|
||||
SurfaceTool::create_vertex_array_from_triangle_arrays(arrays, s.vertices);
|
||||
|
||||
Vector<Vector3> rvertices = arrays[Mesh::ARRAY_VERTEX];
|
||||
int vc = rvertices.size();
|
||||
|
@ -1612,9 +1615,6 @@ void ArrayMesh::_bind_methods() {
|
|||
ADD_PROPERTY(PropertyInfo(Variant::ARRAY, "_surfaces", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_surfaces", "_get_surfaces");
|
||||
ADD_PROPERTY(PropertyInfo(Variant::INT, "blend_shape_mode", PROPERTY_HINT_ENUM, "Normalized,Relative"), "set_blend_shape_mode", "get_blend_shape_mode");
|
||||
ADD_PROPERTY(PropertyInfo(Variant::AABB, "custom_aabb", PROPERTY_HINT_NONE, ""), "set_custom_aabb", "get_custom_aabb");
|
||||
|
||||
BIND_ENUM_CONSTANT(BLEND_SHAPE_MODE_NORMALIZED);
|
||||
BIND_ENUM_CONSTANT(BLEND_SHAPE_MODE_RELATIVE);
|
||||
}
|
||||
|
||||
void ArrayMesh::reload_from_file() {
|
||||
|
|
|
@ -53,7 +53,10 @@ public:
|
|||
NO_INDEX_ARRAY = RenderingServer::NO_INDEX_ARRAY,
|
||||
ARRAY_WEIGHTS_SIZE = RenderingServer::ARRAY_WEIGHTS_SIZE
|
||||
};
|
||||
|
||||
enum BlendShapeMode {
|
||||
BLEND_SHAPE_MODE_NORMALIZED = RS::BLEND_SHAPE_MODE_NORMALIZED,
|
||||
BLEND_SHAPE_MODE_RELATIVE = RS::BLEND_SHAPE_MODE_RELATIVE,
|
||||
};
|
||||
enum ArrayType {
|
||||
ARRAY_VERTEX = RenderingServer::ARRAY_VERTEX,
|
||||
ARRAY_NORMAL = RenderingServer::ARRAY_NORMAL,
|
||||
|
@ -171,12 +174,6 @@ class ArrayMesh : public Mesh {
|
|||
Array _get_surfaces() const;
|
||||
void _set_surfaces(const Array &p_data);
|
||||
|
||||
public:
|
||||
enum BlendShapeMode {
|
||||
BLEND_SHAPE_MODE_NORMALIZED = RS::BLEND_SHAPE_MODE_NORMALIZED,
|
||||
BLEND_SHAPE_MODE_RELATIVE = RS::BLEND_SHAPE_MODE_RELATIVE,
|
||||
};
|
||||
|
||||
private:
|
||||
struct Surface {
|
||||
uint32_t format;
|
||||
|
@ -268,6 +265,6 @@ VARIANT_ENUM_CAST(Mesh::ArrayType);
|
|||
VARIANT_ENUM_CAST(Mesh::ArrayFormat);
|
||||
VARIANT_ENUM_CAST(Mesh::ArrayCustomFormat);
|
||||
VARIANT_ENUM_CAST(Mesh::PrimitiveType);
|
||||
VARIANT_ENUM_CAST(ArrayMesh::BlendShapeMode);
|
||||
VARIANT_ENUM_CAST(Mesh::BlendShapeMode);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,6 +33,9 @@
|
|||
#define _VERTEX_SNAP 0.0001
|
||||
#define EQ_VERTEX_DIST 0.00001
|
||||
|
||||
SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr;
|
||||
SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr;
|
||||
|
||||
bool SurfaceTool::Vertex::operator==(const Vertex &p_vertex) const {
|
||||
if (vertex != p_vertex.vertex) {
|
||||
return false;
|
||||
|
@ -80,6 +83,10 @@ bool SurfaceTool::Vertex::operator==(const Vertex &p_vertex) const {
|
|||
}
|
||||
}
|
||||
|
||||
if (smooth_group != p_vertex.smooth_group) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -94,6 +101,7 @@ uint32_t SurfaceTool::VertexHasher::hash(const Vertex &p_vtx) {
|
|||
h = hash_djb2_buffer((const uint8_t *)p_vtx.bones.ptr(), p_vtx.bones.size() * sizeof(int), h);
|
||||
h = hash_djb2_buffer((const uint8_t *)p_vtx.weights.ptr(), p_vtx.weights.size() * sizeof(float), h);
|
||||
h = hash_djb2_buffer((const uint8_t *)&p_vtx.custom[0], sizeof(Color) * RS::ARRAY_CUSTOM_COUNT, h);
|
||||
h = hash_djb2_one_32(p_vtx.smooth_group, h);
|
||||
return h;
|
||||
}
|
||||
|
||||
|
@ -118,6 +126,8 @@ void SurfaceTool::add_vertex(const Vector3 &p_vertex) {
|
|||
vtx.bones = last_bones;
|
||||
vtx.tangent = last_tangent.normal;
|
||||
vtx.binormal = last_normal.cross(last_tangent.normal).normalized() * last_tangent.d;
|
||||
vtx.smooth_group = last_smooth_group;
|
||||
|
||||
for (int i = 0; i < RS::ARRAY_CUSTOM_COUNT; i++) {
|
||||
vtx.custom[i] = last_custom[i];
|
||||
}
|
||||
|
@ -252,13 +262,8 @@ void SurfaceTool::set_weights(const Vector<float> &p_weights) {
|
|||
last_weights = p_weights;
|
||||
}
|
||||
|
||||
void SurfaceTool::add_smooth_group(bool p_smooth) {
|
||||
ERR_FAIL_COND(!begun);
|
||||
if (index_array.size()) {
|
||||
smooth_groups[index_array.size()] = p_smooth;
|
||||
} else {
|
||||
smooth_groups[vertex_array.size()] = p_smooth;
|
||||
}
|
||||
void SurfaceTool::set_smooth_group(uint32_t p_group) {
|
||||
last_smooth_group = p_group;
|
||||
}
|
||||
|
||||
void SurfaceTool::add_triangle_fan(const Vector<Vector3> &p_vertices, const Vector<Vector2> &p_uvs, const Vector<Color> &p_colors, const Vector<Vector2> &p_uv2s, const Vector<Vector3> &p_normals, const Vector<Plane> &p_tangents) {
|
||||
|
@ -315,9 +320,8 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len);
|
||||
Vector3 *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
switch (i) {
|
||||
case Mesh::ARRAY_VERTEX: {
|
||||
|
@ -339,9 +343,8 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len);
|
||||
Vector2 *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
switch (i) {
|
||||
case Mesh::ARRAY_TEX_UV: {
|
||||
|
@ -360,9 +363,8 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 4);
|
||||
float *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx += 4) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
w[idx + 0] = v.tangent.x;
|
||||
w[idx + 1] = v.tangent.y;
|
||||
|
@ -381,9 +383,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len);
|
||||
Color *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
w[idx] = v.color;
|
||||
}
|
||||
|
||||
|
@ -400,9 +402,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 4);
|
||||
uint8_t *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 4 + 0] = CLAMP(int32_t(c.r * 255.0), 0, 255);
|
||||
w[idx * 4 + 1] = CLAMP(int32_t(c.g * 255.0), 0, 255);
|
||||
|
@ -417,9 +419,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 4);
|
||||
uint8_t *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 4 + 0] = uint8_t(int8_t(CLAMP(int32_t(c.r * 127.0), -128, 127)));
|
||||
w[idx * 4 + 1] = uint8_t(int8_t(CLAMP(int32_t(c.g * 127.0), -128, 127)));
|
||||
|
@ -434,9 +436,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 4);
|
||||
uint16_t *w = (uint16_t *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 2 + 0] = Math::make_half_float(c.r);
|
||||
w[idx * 2 + 1] = Math::make_half_float(c.g);
|
||||
|
@ -449,9 +451,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 8);
|
||||
uint16_t *w = (uint16_t *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 4 + 0] = Math::make_half_float(c.r);
|
||||
w[idx * 4 + 1] = Math::make_half_float(c.g);
|
||||
|
@ -466,9 +468,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len);
|
||||
float *w = (float *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx] = c.r;
|
||||
}
|
||||
|
@ -480,9 +482,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 2);
|
||||
float *w = (float *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 2 + 0] = c.r;
|
||||
w[idx * 2 + 1] = c.g;
|
||||
|
@ -495,9 +497,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 3);
|
||||
float *w = (float *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 3 + 0] = c.r;
|
||||
w[idx * 3 + 1] = c.g;
|
||||
|
@ -511,9 +513,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * 4);
|
||||
float *w = (float *)array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx++) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
const Color &c = v.custom[idx];
|
||||
w[idx * 4 + 0] = c.r;
|
||||
w[idx * 4 + 1] = c.g;
|
||||
|
@ -533,9 +535,8 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * count);
|
||||
int *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx += count) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
ERR_CONTINUE(v.bones.size() != count);
|
||||
|
||||
|
@ -554,9 +555,9 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(varr_len * count);
|
||||
float *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next(), idx += count) {
|
||||
const Vertex &v = E->get();
|
||||
for (uint32_t idx = 0; idx < vertex_array.size(); idx++) {
|
||||
const Vertex &v = vertex_array[idx];
|
||||
|
||||
ERR_CONTINUE(v.weights.size() != count);
|
||||
|
||||
for (int j = 0; j < count; j++) {
|
||||
|
@ -574,9 +575,8 @@ Array SurfaceTool::commit_to_arrays() {
|
|||
array.resize(index_array.size());
|
||||
int *w = array.ptrw();
|
||||
|
||||
int idx = 0;
|
||||
for (List<int>::Element *E = index_array.front(); E; E = E->next(), idx++) {
|
||||
w[idx] = E->get();
|
||||
for (uint32_t idx = 0; idx < index_array.size(); idx++) {
|
||||
w[idx] = index_array[idx];
|
||||
}
|
||||
|
||||
a[i] = array;
|
||||
|
@ -623,15 +623,16 @@ void SurfaceTool::index() {
|
|||
}
|
||||
|
||||
HashMap<Vertex, int, VertexHasher> indices;
|
||||
List<Vertex> new_vertices;
|
||||
LocalVector<Vertex> old_vertex_array = vertex_array;
|
||||
vertex_array.clear();
|
||||
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next()) {
|
||||
int *idxptr = indices.getptr(E->get());
|
||||
for (uint32_t i = 0; i < old_vertex_array.size(); i++) {
|
||||
int *idxptr = indices.getptr(old_vertex_array[i]);
|
||||
int idx;
|
||||
if (!idxptr) {
|
||||
idx = indices.size();
|
||||
new_vertices.push_back(E->get());
|
||||
indices[E->get()] = idx;
|
||||
vertex_array.push_back(old_vertex_array[i]);
|
||||
indices[old_vertex_array[i]] = idx;
|
||||
} else {
|
||||
idx = *idxptr;
|
||||
}
|
||||
|
@ -639,9 +640,6 @@ void SurfaceTool::index() {
|
|||
index_array.push_back(idx);
|
||||
}
|
||||
|
||||
vertex_array.clear();
|
||||
vertex_array = new_vertices;
|
||||
|
||||
format |= Mesh::ARRAY_FORMAT_INDEX;
|
||||
}
|
||||
|
||||
|
@ -649,29 +647,26 @@ void SurfaceTool::deindex() {
|
|||
if (index_array.size() == 0) {
|
||||
return; //nothing to deindex
|
||||
}
|
||||
Vector<Vertex> varr;
|
||||
varr.resize(vertex_array.size());
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next()) {
|
||||
varr.write[idx++] = E->get();
|
||||
}
|
||||
|
||||
LocalVector<Vertex> old_vertex_array = vertex_array;
|
||||
vertex_array.clear();
|
||||
for (List<int>::Element *E = index_array.front(); E; E = E->next()) {
|
||||
ERR_FAIL_INDEX(E->get(), varr.size());
|
||||
vertex_array.push_back(varr[E->get()]);
|
||||
for (uint32_t i = 0; i < index_array.size(); i++) {
|
||||
uint32_t index = index_array[i];
|
||||
ERR_FAIL_COND(index >= old_vertex_array.size());
|
||||
vertex_array.push_back(old_vertex_array[index]);
|
||||
}
|
||||
format &= ~Mesh::ARRAY_FORMAT_INDEX;
|
||||
index_array.clear();
|
||||
}
|
||||
|
||||
void SurfaceTool::_create_list(const Ref<Mesh> &p_existing, int p_surface, List<Vertex> *r_vertex, List<int> *r_index, uint32_t &lformat) {
|
||||
void SurfaceTool::_create_list(const Ref<Mesh> &p_existing, int p_surface, LocalVector<Vertex> *r_vertex, LocalVector<int> *r_index, uint32_t &lformat) {
|
||||
Array arr = p_existing->surface_get_arrays(p_surface);
|
||||
ERR_FAIL_COND(arr.size() != RS::ARRAY_MAX);
|
||||
_create_list_from_arrays(arr, r_vertex, r_index, lformat);
|
||||
}
|
||||
|
||||
Vector<SurfaceTool::Vertex> SurfaceTool::create_vertex_array_from_triangle_arrays(const Array &p_arrays, uint32_t *r_format) {
|
||||
Vector<SurfaceTool::Vertex> ret;
|
||||
void SurfaceTool::create_vertex_array_from_triangle_arrays(const Array &p_arrays, LocalVector<SurfaceTool::Vertex> &ret, uint32_t *r_format) {
|
||||
ret.clear();
|
||||
|
||||
Vector<Vector3> varr = p_arrays[RS::ARRAY_VERTEX];
|
||||
Vector<Vector3> narr = p_arrays[RS::ARRAY_NORMAL];
|
||||
|
@ -688,7 +683,7 @@ Vector<SurfaceTool::Vertex> SurfaceTool::create_vertex_array_from_triangle_array
|
|||
if (r_format) {
|
||||
*r_format = 0;
|
||||
}
|
||||
return ret;
|
||||
return;
|
||||
}
|
||||
|
||||
int lformat = 0;
|
||||
|
@ -799,19 +794,14 @@ Vector<SurfaceTool::Vertex> SurfaceTool::create_vertex_array_from_triangle_array
|
|||
if (r_format) {
|
||||
*r_format = lformat;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SurfaceTool::_create_list_from_arrays(Array arr, List<Vertex> *r_vertex, List<int> *r_index, uint32_t &lformat) {
|
||||
Vector<Vertex> arrays = create_vertex_array_from_triangle_arrays(arr, &lformat);
|
||||
ERR_FAIL_COND(arrays.size() == 0);
|
||||
|
||||
for (int i = 0; i < arrays.size(); i++) {
|
||||
r_vertex->push_back(arrays[i]);
|
||||
}
|
||||
void SurfaceTool::_create_list_from_arrays(Array arr, LocalVector<Vertex> *r_vertex, LocalVector<int> *r_index, uint32_t &lformat) {
|
||||
create_vertex_array_from_triangle_arrays(arr, *r_vertex, &lformat);
|
||||
ERR_FAIL_COND(r_vertex->size() == 0);
|
||||
|
||||
//indices
|
||||
r_index->clear();
|
||||
|
||||
Vector<int> idx = arr[RS::ARRAY_INDEX];
|
||||
int is = idx.size();
|
||||
|
@ -864,14 +854,14 @@ void SurfaceTool::append_from(const Ref<Mesh> &p_existing, int p_surface, const
|
|||
}
|
||||
|
||||
uint32_t nformat;
|
||||
List<Vertex> nvertices;
|
||||
List<int> nindices;
|
||||
LocalVector<Vertex> nvertices;
|
||||
LocalVector<int> nindices;
|
||||
_create_list(p_existing, p_surface, &nvertices, &nindices, nformat);
|
||||
format |= nformat;
|
||||
int vfrom = vertex_array.size();
|
||||
|
||||
for (List<Vertex>::Element *E = nvertices.front(); E; E = E->next()) {
|
||||
Vertex v = E->get();
|
||||
for (uint32_t vi = 0; vi < nvertices.size(); vi++) {
|
||||
Vertex v = nvertices[vi];
|
||||
v.vertex = p_xform.xform(v.vertex);
|
||||
if (nformat & RS::ARRAY_FORMAT_NORMAL) {
|
||||
v.normal = p_xform.basis.xform(v.normal);
|
||||
|
@ -884,8 +874,8 @@ void SurfaceTool::append_from(const Ref<Mesh> &p_existing, int p_surface, const
|
|||
vertex_array.push_back(v);
|
||||
}
|
||||
|
||||
for (List<int>::Element *E = nindices.front(); E; E = E->next()) {
|
||||
int dst_index = E->get() + vfrom;
|
||||
for (uint32_t i = 0; i < nindices.size(); i++) {
|
||||
int dst_index = nindices[i] + vfrom;
|
||||
index_array.push_back(dst_index);
|
||||
}
|
||||
if (index_array.size() % 3) {
|
||||
|
@ -896,18 +886,18 @@ void SurfaceTool::append_from(const Ref<Mesh> &p_existing, int p_surface, const
|
|||
//mikktspace callbacks
|
||||
namespace {
|
||||
struct TangentGenerationContextUserData {
|
||||
Vector<List<SurfaceTool::Vertex>::Element *> vertices;
|
||||
Vector<List<int>::Element *> indices;
|
||||
LocalVector<SurfaceTool::Vertex> *vertices;
|
||||
LocalVector<int> *indices;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
int SurfaceTool::mikktGetNumFaces(const SMikkTSpaceContext *pContext) {
|
||||
TangentGenerationContextUserData &triangle_data = *reinterpret_cast<TangentGenerationContextUserData *>(pContext->m_pUserData);
|
||||
|
||||
if (triangle_data.indices.size() > 0) {
|
||||
return triangle_data.indices.size() / 3;
|
||||
if (triangle_data.indices->size() > 0) {
|
||||
return triangle_data.indices->size() / 3;
|
||||
} else {
|
||||
return triangle_data.vertices.size() / 3;
|
||||
return triangle_data.vertices->size() / 3;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -918,13 +908,13 @@ int SurfaceTool::mikktGetNumVerticesOfFace(const SMikkTSpaceContext *pContext, c
|
|||
void SurfaceTool::mikktGetPosition(const SMikkTSpaceContext *pContext, float fvPosOut[], const int iFace, const int iVert) {
|
||||
TangentGenerationContextUserData &triangle_data = *reinterpret_cast<TangentGenerationContextUserData *>(pContext->m_pUserData);
|
||||
Vector3 v;
|
||||
if (triangle_data.indices.size() > 0) {
|
||||
int index = triangle_data.indices[iFace * 3 + iVert]->get();
|
||||
if (index < triangle_data.vertices.size()) {
|
||||
v = triangle_data.vertices[index]->get().vertex;
|
||||
if (triangle_data.indices->size() > 0) {
|
||||
uint32_t index = triangle_data.indices->operator[](iFace * 3 + iVert);
|
||||
if (index < triangle_data.vertices->size()) {
|
||||
v = triangle_data.vertices->operator[](index).vertex;
|
||||
}
|
||||
} else {
|
||||
v = triangle_data.vertices[iFace * 3 + iVert]->get().vertex;
|
||||
v = triangle_data.vertices->operator[](iFace * 3 + iVert).vertex;
|
||||
}
|
||||
|
||||
fvPosOut[0] = v.x;
|
||||
|
@ -935,13 +925,13 @@ void SurfaceTool::mikktGetPosition(const SMikkTSpaceContext *pContext, float fvP
|
|||
void SurfaceTool::mikktGetNormal(const SMikkTSpaceContext *pContext, float fvNormOut[], const int iFace, const int iVert) {
|
||||
TangentGenerationContextUserData &triangle_data = *reinterpret_cast<TangentGenerationContextUserData *>(pContext->m_pUserData);
|
||||
Vector3 v;
|
||||
if (triangle_data.indices.size() > 0) {
|
||||
int index = triangle_data.indices[iFace * 3 + iVert]->get();
|
||||
if (index < triangle_data.vertices.size()) {
|
||||
v = triangle_data.vertices[index]->get().normal;
|
||||
if (triangle_data.indices->size() > 0) {
|
||||
uint32_t index = triangle_data.indices->operator[](iFace * 3 + iVert);
|
||||
if (index < triangle_data.vertices->size()) {
|
||||
v = triangle_data.vertices->operator[](index).normal;
|
||||
}
|
||||
} else {
|
||||
v = triangle_data.vertices[iFace * 3 + iVert]->get().normal;
|
||||
v = triangle_data.vertices->operator[](iFace * 3 + iVert).normal;
|
||||
}
|
||||
|
||||
fvNormOut[0] = v.x;
|
||||
|
@ -952,13 +942,13 @@ void SurfaceTool::mikktGetNormal(const SMikkTSpaceContext *pContext, float fvNor
|
|||
void SurfaceTool::mikktGetTexCoord(const SMikkTSpaceContext *pContext, float fvTexcOut[], const int iFace, const int iVert) {
|
||||
TangentGenerationContextUserData &triangle_data = *reinterpret_cast<TangentGenerationContextUserData *>(pContext->m_pUserData);
|
||||
Vector2 v;
|
||||
if (triangle_data.indices.size() > 0) {
|
||||
int index = triangle_data.indices[iFace * 3 + iVert]->get();
|
||||
if (index < triangle_data.vertices.size()) {
|
||||
v = triangle_data.vertices[index]->get().uv;
|
||||
if (triangle_data.indices->size() > 0) {
|
||||
uint32_t index = triangle_data.indices->operator[](iFace * 3 + iVert);
|
||||
if (index < triangle_data.vertices->size()) {
|
||||
v = triangle_data.vertices->operator[](index).uv;
|
||||
}
|
||||
} else {
|
||||
v = triangle_data.vertices[iFace * 3 + iVert]->get().uv;
|
||||
v = triangle_data.vertices->operator[](iFace * 3 + iVert).uv;
|
||||
}
|
||||
|
||||
fvTexcOut[0] = v.x;
|
||||
|
@ -969,13 +959,13 @@ void SurfaceTool::mikktSetTSpaceDefault(const SMikkTSpaceContext *pContext, cons
|
|||
const tbool bIsOrientationPreserving, const int iFace, const int iVert) {
|
||||
TangentGenerationContextUserData &triangle_data = *reinterpret_cast<TangentGenerationContextUserData *>(pContext->m_pUserData);
|
||||
Vertex *vtx = nullptr;
|
||||
if (triangle_data.indices.size() > 0) {
|
||||
int index = triangle_data.indices[iFace * 3 + iVert]->get();
|
||||
if (index < triangle_data.vertices.size()) {
|
||||
vtx = &triangle_data.vertices[index]->get();
|
||||
if (triangle_data.indices->size() > 0) {
|
||||
uint32_t index = triangle_data.indices->operator[](iFace * 3 + iVert);
|
||||
if (index < triangle_data.vertices->size()) {
|
||||
vtx = &triangle_data.vertices->operator[](index);
|
||||
}
|
||||
} else {
|
||||
vtx = &triangle_data.vertices[iFace * 3 + iVert]->get();
|
||||
vtx = &triangle_data.vertices->operator[](iFace * 3 + iVert);
|
||||
}
|
||||
|
||||
if (vtx != nullptr) {
|
||||
|
@ -1001,18 +991,12 @@ void SurfaceTool::generate_tangents() {
|
|||
msc.m_pInterface = &mkif;
|
||||
|
||||
TangentGenerationContextUserData triangle_data;
|
||||
triangle_data.vertices.resize(vertex_array.size());
|
||||
int idx = 0;
|
||||
for (List<Vertex>::Element *E = vertex_array.front(); E; E = E->next()) {
|
||||
triangle_data.vertices.write[idx++] = E;
|
||||
E->get().binormal = Vector3();
|
||||
E->get().tangent = Vector3();
|
||||
}
|
||||
triangle_data.indices.resize(index_array.size());
|
||||
idx = 0;
|
||||
for (List<int>::Element *E = index_array.front(); E; E = E->next()) {
|
||||
triangle_data.indices.write[idx++] = E;
|
||||
triangle_data.vertices = &vertex_array;
|
||||
for (uint32_t i = 0; i < vertex_array.size(); i++) {
|
||||
vertex_array[i].binormal = Vector3();
|
||||
vertex_array[i].tangent = Vector3();
|
||||
}
|
||||
triangle_data.indices = &index_array;
|
||||
msc.m_pUserData = &triangle_data;
|
||||
|
||||
bool res = genTangSpaceDefault(&msc);
|
||||
|
@ -1028,66 +1012,36 @@ void SurfaceTool::generate_normals(bool p_flip) {
|
|||
|
||||
deindex();
|
||||
|
||||
ERR_FAIL_COND((vertex_array.size() % 3) != 0);
|
||||
|
||||
HashMap<Vertex, Vector3, VertexHasher> vertex_hash;
|
||||
|
||||
int count = 0;
|
||||
bool smooth = false;
|
||||
if (smooth_groups.has(0)) {
|
||||
smooth = smooth_groups[0];
|
||||
}
|
||||
|
||||
List<Vertex>::Element *B = vertex_array.front();
|
||||
for (List<Vertex>::Element *E = B; E;) {
|
||||
List<Vertex>::Element *v[3];
|
||||
v[0] = E;
|
||||
v[1] = v[0]->next();
|
||||
ERR_FAIL_COND(!v[1]);
|
||||
v[2] = v[1]->next();
|
||||
ERR_FAIL_COND(!v[2]);
|
||||
E = v[2]->next();
|
||||
for (uint32_t vi = 0; vi < vertex_array.size(); vi += 3) {
|
||||
Vertex *v = &vertex_array[vi];
|
||||
|
||||
Vector3 normal;
|
||||
if (!p_flip) {
|
||||
normal = Plane(v[0]->get().vertex, v[1]->get().vertex, v[2]->get().vertex).normal;
|
||||
normal = Plane(v[0].vertex, v[1].vertex, v[2].vertex).normal;
|
||||
} else {
|
||||
normal = Plane(v[2]->get().vertex, v[1]->get().vertex, v[0]->get().vertex).normal;
|
||||
normal = Plane(v[2].vertex, v[1].vertex, v[0].vertex).normal;
|
||||
}
|
||||
|
||||
if (smooth) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
Vector3 *lv = vertex_hash.getptr(v[i]->get());
|
||||
if (!lv) {
|
||||
vertex_hash.set(v[i]->get(), normal);
|
||||
} else {
|
||||
(*lv) += normal;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
v[i]->get().normal = normal;
|
||||
}
|
||||
}
|
||||
count += 3;
|
||||
|
||||
if (smooth_groups.has(count) || !E) {
|
||||
if (vertex_hash.size()) {
|
||||
while (B != E) {
|
||||
Vector3 *lv = vertex_hash.getptr(B->get());
|
||||
if (lv) {
|
||||
B->get().normal = lv->normalized();
|
||||
}
|
||||
|
||||
B = B->next();
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
Vector3 *lv = vertex_hash.getptr(v[i]);
|
||||
if (!lv) {
|
||||
vertex_hash.set(v[i], normal);
|
||||
} else {
|
||||
B = E;
|
||||
(*lv) += normal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vertex_hash.clear();
|
||||
if (E) {
|
||||
smooth = smooth_groups[count];
|
||||
}
|
||||
for (uint32_t vi = 0; vi < vertex_array.size(); vi++) {
|
||||
Vector3 *lv = vertex_hash.getptr(vertex_array[vi]);
|
||||
if (!lv) {
|
||||
vertex_array[vi].normal = Vector3();
|
||||
} else {
|
||||
vertex_array[vi].normal = lv->normalized();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1095,7 +1049,6 @@ void SurfaceTool::generate_normals(bool p_flip) {
|
|||
|
||||
if (was_indexed) {
|
||||
index();
|
||||
smooth_groups.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1111,8 +1064,8 @@ void SurfaceTool::clear() {
|
|||
last_weights.clear();
|
||||
index_array.clear();
|
||||
vertex_array.clear();
|
||||
smooth_groups.clear();
|
||||
material.unref();
|
||||
last_smooth_group = 0;
|
||||
for (int i = 0; i < RS::ARRAY_CUSTOM_COUNT; i++) {
|
||||
last_custom_format[i] = CUSTOM_MAX;
|
||||
}
|
||||
|
@ -1136,6 +1089,51 @@ SurfaceTool::CustomFormat SurfaceTool::get_custom_format(int p_index) const {
|
|||
ERR_FAIL_INDEX_V(p_index, RS::ARRAY_CUSTOM_COUNT, CUSTOM_MAX);
|
||||
return last_custom_format[p_index];
|
||||
}
|
||||
void SurfaceTool::optimize_indices_for_cache() {
|
||||
ERR_FAIL_COND(optimize_vertex_cache_func == nullptr);
|
||||
ERR_FAIL_COND(index_array.size() == 0);
|
||||
|
||||
LocalVector old_index_array = index_array;
|
||||
zeromem(index_array.ptr(), index_array.size() * sizeof(int));
|
||||
optimize_vertex_cache_func((unsigned int *)index_array.ptr(), (unsigned int *)old_index_array.ptr(), old_index_array.size(), vertex_array.size());
|
||||
}
|
||||
|
||||
float SurfaceTool::get_max_axis_length() const {
|
||||
ERR_FAIL_COND_V(vertex_array.size() == 0, 0);
|
||||
|
||||
AABB aabb;
|
||||
for (uint32_t i = 0; i < vertex_array.size(); i++) {
|
||||
if (i == 0) {
|
||||
aabb.position = vertex_array[i].vertex;
|
||||
} else {
|
||||
aabb.expand_to(vertex_array[i].vertex);
|
||||
}
|
||||
}
|
||||
|
||||
return aabb.get_longest_axis_size();
|
||||
}
|
||||
Vector<int> SurfaceTool::generate_lod(float p_threshold, int p_target_index_count) {
|
||||
Vector<int> lod;
|
||||
|
||||
ERR_FAIL_COND_V(simplify_func == nullptr, lod);
|
||||
ERR_FAIL_COND_V(vertex_array.size() == 0, lod);
|
||||
ERR_FAIL_COND_V(index_array.size() == 0, lod);
|
||||
|
||||
lod.resize(index_array.size());
|
||||
LocalVector<float> vertices; //uses floats
|
||||
vertices.resize(vertex_array.size() * 3);
|
||||
for (uint32_t i = 0; i < vertex_array.size(); i++) {
|
||||
vertices[i * 3 + 0] = vertex_array[i].vertex.x;
|
||||
vertices[i * 3 + 1] = vertex_array[i].vertex.y;
|
||||
vertices[i * 3 + 2] = vertex_array[i].vertex.z;
|
||||
}
|
||||
|
||||
uint32_t index_count = simplify_func((unsigned int *)lod.ptrw(), (unsigned int *)index_array.ptr(), index_array.size(), vertices.ptr(), vertex_array.size(), sizeof(float) * 3, p_target_index_count, p_threshold);
|
||||
ERR_FAIL_COND_V(index_count == 0, lod);
|
||||
lod.resize(index_count);
|
||||
|
||||
return lod;
|
||||
}
|
||||
|
||||
void SurfaceTool::_bind_methods() {
|
||||
ClassDB::bind_method(D_METHOD("set_skin_weight_count", "count"), &SurfaceTool::set_skin_weight_count);
|
||||
|
@ -1155,7 +1153,7 @@ void SurfaceTool::_bind_methods() {
|
|||
ClassDB::bind_method(D_METHOD("set_bones", "bones"), &SurfaceTool::set_bones);
|
||||
ClassDB::bind_method(D_METHOD("set_weights", "weights"), &SurfaceTool::set_weights);
|
||||
ClassDB::bind_method(D_METHOD("set_custom", "index", "custom"), &SurfaceTool::set_custom);
|
||||
ClassDB::bind_method(D_METHOD("add_smooth_group", "smooth"), &SurfaceTool::add_smooth_group);
|
||||
ClassDB::bind_method(D_METHOD("set_smooth_group", "index"), &SurfaceTool::set_smooth_group);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("add_triangle_fan", "vertices", "uvs", "colors", "uv2s", "normals", "tangents"), &SurfaceTool::add_triangle_fan, DEFVAL(Vector<Vector2>()), DEFVAL(Vector<Color>()), DEFVAL(Vector<Vector2>()), DEFVAL(Vector<Vector3>()), DEFVAL(Vector<Plane>()));
|
||||
|
||||
|
@ -1166,6 +1164,11 @@ void SurfaceTool::_bind_methods() {
|
|||
ClassDB::bind_method(D_METHOD("generate_normals", "flip"), &SurfaceTool::generate_normals, DEFVAL(false));
|
||||
ClassDB::bind_method(D_METHOD("generate_tangents"), &SurfaceTool::generate_tangents);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("optimize_indices_for_cache"), &SurfaceTool::optimize_indices_for_cache);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("get_max_axis_length"), &SurfaceTool::get_max_axis_length);
|
||||
ClassDB::bind_method(D_METHOD("generate_lod", "nd_threshold", "target_index_count"), &SurfaceTool::generate_lod, DEFVAL(3));
|
||||
|
||||
ClassDB::bind_method(D_METHOD("set_material", "material"), &SurfaceTool::set_material);
|
||||
|
||||
ClassDB::bind_method(D_METHOD("clear"), &SurfaceTool::clear);
|
||||
|
|
|
@ -31,8 +31,8 @@
|
|||
#ifndef SURFACE_TOOL_H
|
||||
#define SURFACE_TOOL_H
|
||||
|
||||
#include "core/templates/local_vector.h"
|
||||
#include "scene/resources/mesh.h"
|
||||
|
||||
#include "thirdparty/misc/mikktspace.h"
|
||||
|
||||
class SurfaceTool : public Reference {
|
||||
|
@ -50,6 +50,7 @@ public:
|
|||
Vector<int> bones;
|
||||
Vector<float> weights;
|
||||
Color custom[RS::ARRAY_CUSTOM_COUNT];
|
||||
uint32_t smooth_group = 0;
|
||||
|
||||
bool operator==(const Vertex &p_vertex) const;
|
||||
|
||||
|
@ -73,6 +74,11 @@ public:
|
|||
SKIN_8_WEIGHTS
|
||||
};
|
||||
|
||||
typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
|
||||
static OptimizeVertexCacheFunc optimize_vertex_cache_func;
|
||||
typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
|
||||
static SimplifyFunc simplify_func;
|
||||
|
||||
private:
|
||||
struct VertexHasher {
|
||||
static _FORCE_INLINE_ uint32_t hash(const Vertex &p_vtx);
|
||||
|
@ -92,9 +98,8 @@ private:
|
|||
uint32_t format;
|
||||
Ref<Material> material;
|
||||
//arrays
|
||||
List<Vertex> vertex_array;
|
||||
List<int> index_array;
|
||||
Map<int, bool> smooth_groups;
|
||||
LocalVector<Vertex> vertex_array;
|
||||
LocalVector<int> index_array;
|
||||
|
||||
//memory
|
||||
Color last_color;
|
||||
|
@ -104,6 +109,7 @@ private:
|
|||
Vector<int> last_bones;
|
||||
Vector<float> last_weights;
|
||||
Plane last_tangent;
|
||||
uint32_t last_smooth_group = 0;
|
||||
|
||||
SkinWeightCount skin_weights;
|
||||
|
||||
|
@ -111,8 +117,8 @@ private:
|
|||
|
||||
CustomFormat last_custom_format[RS::ARRAY_CUSTOM_COUNT];
|
||||
|
||||
void _create_list_from_arrays(Array arr, List<Vertex> *r_vertex, List<int> *r_index, uint32_t &lformat);
|
||||
void _create_list(const Ref<Mesh> &p_existing, int p_surface, List<Vertex> *r_vertex, List<int> *r_index, uint32_t &lformat);
|
||||
void _create_list_from_arrays(Array arr, LocalVector<Vertex> *r_vertex, LocalVector<int> *r_index, uint32_t &lformat);
|
||||
void _create_list(const Ref<Mesh> &p_existing, int p_surface, LocalVector<Vertex> *r_vertex, LocalVector<int> *r_index, uint32_t &lformat);
|
||||
|
||||
//mikktspace callbacks
|
||||
static int mikktGetNumFaces(const SMikkTSpaceContext *pContext);
|
||||
|
@ -143,10 +149,10 @@ public:
|
|||
void set_custom(int p_index, const Color &p_custom);
|
||||
void set_bones(const Vector<int> &p_bones);
|
||||
void set_weights(const Vector<float> &p_weights);
|
||||
void set_smooth_group(uint32_t p_group);
|
||||
|
||||
void add_vertex(const Vector3 &p_vertex);
|
||||
|
||||
void add_smooth_group(bool p_smooth);
|
||||
void add_triangle_fan(const Vector<Vector3> &p_vertices, const Vector<Vector2> &p_uvs = Vector<Vector2>(), const Vector<Color> &p_colors = Vector<Color>(), const Vector<Vector2> &p_uv2s = Vector<Vector2>(), const Vector<Vector3> &p_normals = Vector<Vector3>(), const Vector<Plane> &p_tangents = Vector<Plane>());
|
||||
|
||||
void add_index(int p_index);
|
||||
|
@ -156,14 +162,18 @@ public:
|
|||
void generate_normals(bool p_flip = false);
|
||||
void generate_tangents();
|
||||
|
||||
void optimize_indices_for_cache();
|
||||
float get_max_axis_length() const;
|
||||
Vector<int> generate_lod(float p_threshold, int p_target_index_count = 3);
|
||||
|
||||
void set_material(const Ref<Material> &p_material);
|
||||
|
||||
void clear();
|
||||
|
||||
List<Vertex> &get_vertex_array() { return vertex_array; }
|
||||
LocalVector<Vertex> &get_vertex_array() { return vertex_array; }
|
||||
|
||||
void create_from_triangle_arrays(const Array &p_arrays);
|
||||
static Vector<Vertex> create_vertex_array_from_triangle_arrays(const Array &p_arrays, uint32_t *r_format = nullptr);
|
||||
static void create_vertex_array_from_triangle_arrays(const Array &p_arrays, LocalVector<Vertex> &ret, uint32_t *r_format = nullptr);
|
||||
Array commit_to_arrays();
|
||||
void create_from(const Ref<Mesh> &p_existing, int p_surface);
|
||||
void create_from_blend_shape(const Ref<Mesh> &p_existing, int p_surface, const String &p_blend_shape_name);
|
||||
|
|
|
@ -357,6 +357,16 @@ File extracted from upstream release tarball:
|
|||
- Added 2 files `godot_core_mbedtls_platform.{c,h}` providing configuration
|
||||
for light bundling with core.
|
||||
|
||||
## meshoptimizer
|
||||
|
||||
- Upstream: https://github.com/zeux/meshoptimizer
|
||||
- Version: 0.15(2020)
|
||||
- License: MIT
|
||||
|
||||
File extracted from upstream release tarball:
|
||||
|
||||
- Files in src/ go to thirdparty/meshoptimizer
|
||||
|
||||
|
||||
## miniupnpc
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2016-2020 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,8 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*))
|
||||
{
|
||||
meshopt_Allocator::Storage::allocate = allocate;
|
||||
meshopt_Allocator::Storage::deallocate = deallocate;
|
||||
}
|
|
@ -0,0 +1,351 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
|
||||
// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
|
||||
// Jack Ritter. An Efficient Bounding Sphere. 1990
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
|
||||
{
|
||||
assert(count > 0);
|
||||
|
||||
// find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
|
||||
size_t pmin[3] = {0, 0, 0};
|
||||
size_t pmax[3] = {0, 0, 0};
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* p = points[i];
|
||||
|
||||
for (int axis = 0; axis < 3; ++axis)
|
||||
{
|
||||
pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
|
||||
pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
|
||||
}
|
||||
}
|
||||
|
||||
// find the pair of points with largest distance
|
||||
float paxisd2 = 0;
|
||||
int paxis = 0;
|
||||
|
||||
for (int axis = 0; axis < 3; ++axis)
|
||||
{
|
||||
const float* p1 = points[pmin[axis]];
|
||||
const float* p2 = points[pmax[axis]];
|
||||
|
||||
float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
|
||||
|
||||
if (d2 > paxisd2)
|
||||
{
|
||||
paxisd2 = d2;
|
||||
paxis = axis;
|
||||
}
|
||||
}
|
||||
|
||||
// use the longest segment as the initial sphere diameter
|
||||
const float* p1 = points[pmin[paxis]];
|
||||
const float* p2 = points[pmax[paxis]];
|
||||
|
||||
float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
|
||||
float radius = sqrtf(paxisd2) / 2;
|
||||
|
||||
// iteratively adjust the sphere up until all points fit
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* p = points[i];
|
||||
float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
|
||||
|
||||
if (d2 > radius * radius)
|
||||
{
|
||||
float d = sqrtf(d2);
|
||||
assert(d > 0);
|
||||
|
||||
float k = 0.5f + (radius / d) / 2;
|
||||
|
||||
center[0] = center[0] * k + p[0] * (1 - k);
|
||||
center[1] = center[1] * k + p[1] * (1 - k);
|
||||
center[2] = center[2] * k + p[2] * (1 - k);
|
||||
radius = (radius + d) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
result[0] = center[0];
|
||||
result[1] = center[1];
|
||||
result[2] = center[2];
|
||||
result[3] = radius;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(max_vertices >= 3);
|
||||
assert(max_triangles >= 1);
|
||||
|
||||
// meshlet construction is limited by max vertices and max triangles per meshlet
|
||||
// the worst case is that the input is an unindexed stream since this equally stresses both limits
|
||||
// note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
|
||||
size_t max_vertices_conservative = max_vertices - 2;
|
||||
size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
|
||||
size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
|
||||
|
||||
return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
|
||||
}
|
||||
|
||||
size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(max_vertices >= 3);
|
||||
assert(max_triangles >= 1);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_Meshlet meshlet;
|
||||
memset(&meshlet, 0, sizeof(meshlet));
|
||||
|
||||
assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
|
||||
assert(max_triangles <= sizeof(meshlet.indices) / 3);
|
||||
|
||||
// index of the vertex in the meshlet, 0xff if the vertex isn't used
|
||||
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(used, -1, vertex_count);
|
||||
|
||||
size_t offset = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
unsigned char& av = used[a];
|
||||
unsigned char& bv = used[b];
|
||||
unsigned char& cv = used[c];
|
||||
|
||||
unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
|
||||
|
||||
if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
|
||||
{
|
||||
destination[offset++] = meshlet;
|
||||
|
||||
for (size_t j = 0; j < meshlet.vertex_count; ++j)
|
||||
used[meshlet.vertices[j]] = 0xff;
|
||||
|
||||
memset(&meshlet, 0, sizeof(meshlet));
|
||||
}
|
||||
|
||||
if (av == 0xff)
|
||||
{
|
||||
av = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = a;
|
||||
}
|
||||
|
||||
if (bv == 0xff)
|
||||
{
|
||||
bv = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = b;
|
||||
}
|
||||
|
||||
if (cv == 0xff)
|
||||
{
|
||||
cv = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = c;
|
||||
}
|
||||
|
||||
meshlet.indices[meshlet.triangle_count][0] = av;
|
||||
meshlet.indices[meshlet.triangle_count][1] = bv;
|
||||
meshlet.indices[meshlet.triangle_count][2] = cv;
|
||||
meshlet.triangle_count++;
|
||||
}
|
||||
|
||||
if (meshlet.triangle_count)
|
||||
destination[offset++] = meshlet;
|
||||
|
||||
assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
assert(index_count / 3 <= 256);
|
||||
|
||||
(void)vertex_count;
|
||||
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
// compute triangle normals and gather triangle corners
|
||||
float normals[256][3];
|
||||
float corners[256][3][3];
|
||||
size_t triangles = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
const float* p0 = vertex_positions + vertex_stride_float * a;
|
||||
const float* p1 = vertex_positions + vertex_stride_float * b;
|
||||
const float* p2 = vertex_positions + vertex_stride_float * c;
|
||||
|
||||
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
|
||||
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
|
||||
|
||||
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
|
||||
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
|
||||
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
|
||||
|
||||
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
|
||||
|
||||
// no need to include degenerate triangles - they will be invisible anyway
|
||||
if (area == 0.f)
|
||||
continue;
|
||||
|
||||
// record triangle normals & corners for future use; normal and corner 0 define a plane equation
|
||||
normals[triangles][0] = normalx / area;
|
||||
normals[triangles][1] = normaly / area;
|
||||
normals[triangles][2] = normalz / area;
|
||||
memcpy(corners[triangles][0], p0, 3 * sizeof(float));
|
||||
memcpy(corners[triangles][1], p1, 3 * sizeof(float));
|
||||
memcpy(corners[triangles][2], p2, 3 * sizeof(float));
|
||||
triangles++;
|
||||
}
|
||||
|
||||
meshopt_Bounds bounds = {};
|
||||
|
||||
// degenerate cluster, no valid triangles => trivial reject (cone data is 0)
|
||||
if (triangles == 0)
|
||||
return bounds;
|
||||
|
||||
// compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
|
||||
float psphere[4] = {};
|
||||
computeBoundingSphere(psphere, corners[0], triangles * 3);
|
||||
|
||||
float center[3] = {psphere[0], psphere[1], psphere[2]};
|
||||
|
||||
// treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
|
||||
float nsphere[4] = {};
|
||||
computeBoundingSphere(nsphere, normals, triangles);
|
||||
|
||||
float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
|
||||
float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
|
||||
float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
|
||||
|
||||
axis[0] *= invaxislength;
|
||||
axis[1] *= invaxislength;
|
||||
axis[2] *= invaxislength;
|
||||
|
||||
// compute a tight cone around all normals, mindp = cos(angle/2)
|
||||
float mindp = 1.f;
|
||||
|
||||
for (size_t i = 0; i < triangles; ++i)
|
||||
{
|
||||
float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
|
||||
|
||||
mindp = (dp < mindp) ? dp : mindp;
|
||||
}
|
||||
|
||||
// fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
|
||||
bounds.center[0] = center[0];
|
||||
bounds.center[1] = center[1];
|
||||
bounds.center[2] = center[2];
|
||||
bounds.radius = psphere[3];
|
||||
|
||||
// degenerate cluster, normal cone is larger than a hemisphere => trivial accept
|
||||
// note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
|
||||
// we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
|
||||
if (mindp <= 0.1f)
|
||||
{
|
||||
bounds.cone_cutoff = 1;
|
||||
bounds.cone_cutoff_s8 = 127;
|
||||
return bounds;
|
||||
}
|
||||
|
||||
float maxt = 0;
|
||||
|
||||
// we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
|
||||
for (size_t i = 0; i < triangles; ++i)
|
||||
{
|
||||
// dot(center-t*axis-corner, trinormal) = 0
|
||||
// dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
|
||||
float cx = center[0] - corners[i][0][0];
|
||||
float cy = center[1] - corners[i][0][1];
|
||||
float cz = center[2] - corners[i][0][2];
|
||||
|
||||
float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
|
||||
float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
|
||||
|
||||
// dn should be larger than mindp cutoff above
|
||||
assert(dn > 0.f);
|
||||
float t = dc / dn;
|
||||
|
||||
maxt = (t > maxt) ? t : maxt;
|
||||
}
|
||||
|
||||
// cone apex should be in the negative half-space of all cluster triangles by construction
|
||||
bounds.cone_apex[0] = center[0] - axis[0] * maxt;
|
||||
bounds.cone_apex[1] = center[1] - axis[1] * maxt;
|
||||
bounds.cone_apex[2] = center[2] - axis[2] * maxt;
|
||||
|
||||
// note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
|
||||
bounds.cone_axis[0] = axis[0];
|
||||
bounds.cone_axis[1] = axis[1];
|
||||
bounds.cone_axis[2] = axis[2];
|
||||
|
||||
// cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
|
||||
// which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
|
||||
bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
|
||||
|
||||
// quantize axis & cutoff to 8-bit SNORM format
|
||||
bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
|
||||
bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
|
||||
bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
|
||||
|
||||
// for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
|
||||
float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
|
||||
float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
|
||||
float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
|
||||
|
||||
// note that we need to round this up instead of rounding to nearest, hence +1
|
||||
int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
|
||||
|
||||
bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
|
||||
|
||||
for (size_t i = 0; i < meshlet->triangle_count; ++i)
|
||||
{
|
||||
unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
|
||||
unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
|
||||
unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
|
||||
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
indices[i * 3 + 0] = a;
|
||||
indices[i * 3 + 1] = b;
|
||||
indices[i * 3 + 2] = c;
|
||||
}
|
||||
|
||||
return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
|
@ -0,0 +1,752 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef TRACE
|
||||
#define TRACE 0
|
||||
#endif
|
||||
|
||||
#if TRACE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
// This work is based on:
|
||||
// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
|
||||
// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const unsigned char kIndexHeader = 0xe0;
|
||||
const unsigned char kSequenceHeader = 0xd0;
|
||||
|
||||
static int gEncodeIndexVersion = 0;
|
||||
|
||||
typedef unsigned int VertexFifo[16];
|
||||
typedef unsigned int EdgeFifo[16][2];
|
||||
|
||||
static const unsigned int kTriangleIndexOrder[3][3] = {
|
||||
{0, 1, 2},
|
||||
{1, 2, 0},
|
||||
{2, 0, 1},
|
||||
};
|
||||
|
||||
static const unsigned char kCodeAuxEncodingTable[16] = {
|
||||
0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
|
||||
0, 0, // last two entries aren't used for encoding
|
||||
};
|
||||
|
||||
static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
|
||||
{
|
||||
(void)a;
|
||||
|
||||
return (b == next) ? 1 : (c == next) ? 2 : 0;
|
||||
}
|
||||
|
||||
static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
size_t index = (offset - 1 - i) & 15;
|
||||
|
||||
unsigned int e0 = fifo[index][0];
|
||||
unsigned int e1 = fifo[index][1];
|
||||
|
||||
if (e0 == a && e1 == b)
|
||||
return (i << 2) | 0;
|
||||
if (e0 == b && e1 == c)
|
||||
return (i << 2) | 1;
|
||||
if (e0 == c && e1 == a)
|
||||
return (i << 2) | 2;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
|
||||
{
|
||||
fifo[offset][0] = a;
|
||||
fifo[offset][1] = b;
|
||||
offset = (offset + 1) & 15;
|
||||
}
|
||||
|
||||
static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
size_t index = (offset - 1 - i) & 15;
|
||||
|
||||
if (fifo[index] == v)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
|
||||
{
|
||||
fifo[offset] = v;
|
||||
offset = (offset + cond) & 15;
|
||||
}
|
||||
|
||||
static void encodeVByte(unsigned char*& data, unsigned int v)
|
||||
{
|
||||
// encode 32-bit value in up to 5 7-bit groups
|
||||
do
|
||||
{
|
||||
*data++ = (v & 127) | (v > 127 ? 128 : 0);
|
||||
v >>= 7;
|
||||
} while (v);
|
||||
}
|
||||
|
||||
static unsigned int decodeVByte(const unsigned char*& data)
|
||||
{
|
||||
unsigned char lead = *data++;
|
||||
|
||||
// fast path: single byte
|
||||
if (lead < 128)
|
||||
return lead;
|
||||
|
||||
// slow path: up to 4 extra bytes
|
||||
// note that this loop always terminates, which is important for malformed data
|
||||
unsigned int result = lead & 127;
|
||||
unsigned int shift = 7;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
unsigned char group = *data++;
|
||||
result |= (group & 127) << shift;
|
||||
shift += 7;
|
||||
|
||||
if (group < 128)
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
|
||||
{
|
||||
unsigned int d = index - last;
|
||||
unsigned int v = (d << 1) ^ (int(d) >> 31);
|
||||
|
||||
encodeVByte(data, v);
|
||||
}
|
||||
|
||||
static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
|
||||
{
|
||||
unsigned int v = decodeVByte(data);
|
||||
unsigned int d = (v >> 1) ^ -int(v & 1);
|
||||
|
||||
return last + d;
|
||||
}
|
||||
|
||||
static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
|
||||
{
|
||||
for (int i = 0; i < 16; ++i)
|
||||
if (table[i] == v)
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
|
||||
{
|
||||
if (index_size == 2)
|
||||
{
|
||||
static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
|
||||
static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
|
||||
static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<unsigned int*>(destination)[offset + 0] = a;
|
||||
static_cast<unsigned int*>(destination)[offset + 1] = b;
|
||||
static_cast<unsigned int*>(destination)[offset + 2] = c;
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
static size_t sortTop16(unsigned char dest[16], size_t stats[256])
|
||||
{
|
||||
size_t destsize = 0;
|
||||
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
size_t j = 0;
|
||||
for (; j < destsize; ++j)
|
||||
{
|
||||
if (stats[i] >= stats[dest[j]])
|
||||
{
|
||||
if (destsize < 16)
|
||||
destsize++;
|
||||
|
||||
memmove(&dest[j + 1], &dest[j], destsize - 1 - j);
|
||||
dest[j] = (unsigned char)i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == destsize && destsize < 16)
|
||||
{
|
||||
dest[destsize] = (unsigned char)i;
|
||||
destsize++;
|
||||
}
|
||||
}
|
||||
|
||||
return destsize;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
#if TRACE
|
||||
size_t codestats[256] = {};
|
||||
size_t codeauxstats[256] = {};
|
||||
#endif
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
|
||||
if (buffer_size < 1 + index_count / 3 + 16)
|
||||
return 0;
|
||||
|
||||
int version = gEncodeIndexVersion;
|
||||
|
||||
buffer[0] = (unsigned char)(kIndexHeader | version);
|
||||
|
||||
EdgeFifo edgefifo;
|
||||
memset(edgefifo, -1, sizeof(edgefifo));
|
||||
|
||||
VertexFifo vertexfifo;
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
|
||||
size_t edgefifooffset = 0;
|
||||
size_t vertexfifooffset = 0;
|
||||
|
||||
unsigned int next = 0;
|
||||
unsigned int last = 0;
|
||||
|
||||
unsigned char* code = buffer + 1;
|
||||
unsigned char* data = code + index_count / 3;
|
||||
unsigned char* data_safe_end = buffer + buffer_size - 16;
|
||||
|
||||
int fecmax = version >= 1 ? 13 : 15;
|
||||
|
||||
// use static encoding table; it's possible to pack the result and then build an optimal table and repack
|
||||
// for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
|
||||
const unsigned char* codeaux_table = kCodeAuxEncodingTable;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
// make sure we have enough space to write a triangle
|
||||
// each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
|
||||
// after this we can be sure we can write without extra bounds checks
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
|
||||
|
||||
if (fer >= 0 && (fer >> 2) < 15)
|
||||
{
|
||||
const unsigned int* order = kTriangleIndexOrder[fer & 3];
|
||||
|
||||
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
|
||||
|
||||
// encode edge index and vertex fifo index, next or free index
|
||||
int fe = fer >> 2;
|
||||
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15;
|
||||
|
||||
if (fec == 15 && version >= 1)
|
||||
{
|
||||
// encode last-1 and last+1 to optimize strip-like sequences
|
||||
if (c + 1 == last)
|
||||
fec = 13, last = c;
|
||||
if (c == last + 1)
|
||||
fec = 14, last = c;
|
||||
}
|
||||
|
||||
*code++ = (unsigned char)((fe << 4) | fec);
|
||||
|
||||
#if TRACE
|
||||
codestats[code[-1]]++;
|
||||
#endif
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fec == 15)
|
||||
encodeIndex(data, c, last), last = c;
|
||||
|
||||
// we only need to push third vertex since first two are likely already in the vertex fifo
|
||||
if (fec == 0 || fec >= fecmax)
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// we only need to push two new edges to edge fifo since the third one is already there
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
|
||||
const unsigned int* order = kTriangleIndexOrder[rotation];
|
||||
|
||||
unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
|
||||
|
||||
// if a/b/c are 0/1/2, we emit a reset code
|
||||
bool reset = false;
|
||||
|
||||
if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
|
||||
{
|
||||
reset = true;
|
||||
next = 0;
|
||||
|
||||
// reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
|
||||
// this makes sure next continues to get incremented instead of being stuck
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
}
|
||||
|
||||
int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
|
||||
int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
|
||||
int fea = (a == next) ? (next++, 0) : 15;
|
||||
int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
|
||||
int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
|
||||
|
||||
// we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
|
||||
unsigned char codeaux = (unsigned char)((feb << 4) | fec);
|
||||
int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
|
||||
|
||||
// <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
|
||||
if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
|
||||
{
|
||||
*code++ = (unsigned char)((15 << 4) | codeauxindex);
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = (unsigned char)((15 << 4) | 14 | fea);
|
||||
*data++ = codeaux;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
codestats[code[-1]]++;
|
||||
codeauxstats[codeaux]++;
|
||||
#endif
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fea == 15)
|
||||
encodeIndex(data, a, last), last = a;
|
||||
|
||||
if (feb == 15)
|
||||
encodeIndex(data, b, last), last = b;
|
||||
|
||||
if (fec == 15)
|
||||
encodeIndex(data, c, last), last = c;
|
||||
|
||||
// only push vertices that weren't already in fifo
|
||||
if (fea == 0 || fea == 15)
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
|
||||
if (feb == 0 || feb == 15)
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset);
|
||||
|
||||
if (fec == 0 || fec == 15)
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
// all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure we have enough space to write codeaux table
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
// add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
|
||||
// we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
|
||||
// this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
|
||||
for (size_t i = 0; i < 16; ++i)
|
||||
{
|
||||
// decoder assumes that table entries never refer to separately encoded indices
|
||||
assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
|
||||
|
||||
*data++ = codeaux_table[i];
|
||||
}
|
||||
|
||||
// since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
|
||||
assert(codeaux_table[0] == 0);
|
||||
|
||||
assert(data >= buffer + index_count / 3 + 16);
|
||||
assert(data <= buffer + buffer_size);
|
||||
|
||||
#if TRACE
|
||||
unsigned char codetop[16], codeauxtop[16];
|
||||
size_t codetopsize = sortTop16(codetop, codestats);
|
||||
size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats);
|
||||
|
||||
size_t sumcode = 0, sumcodeaux = 0;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
sumcode += codestats[i], sumcodeaux += codeauxstats[i];
|
||||
|
||||
size_t acccode = 0, acccodeaux = 0;
|
||||
|
||||
printf("code\t\t\t\t\tcodeaux\n");
|
||||
|
||||
for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i)
|
||||
{
|
||||
acccode += codestats[codetop[i]];
|
||||
acccodeaux += codeauxstats[codeauxtop[i]];
|
||||
|
||||
printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n",
|
||||
int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100,
|
||||
int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100);
|
||||
}
|
||||
#endif
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
// compute number of bits required for each index
|
||||
unsigned int vertex_bits = 1;
|
||||
|
||||
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
|
||||
vertex_bits++;
|
||||
|
||||
// worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
|
||||
unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
|
||||
|
||||
return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
|
||||
}
|
||||
|
||||
void meshopt_encodeIndexVersion(int version)
|
||||
{
|
||||
assert(unsigned(version) <= 1);
|
||||
|
||||
meshopt::gEncodeIndexVersion = version;
|
||||
}
|
||||
|
||||
int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(index_size == 2 || index_size == 4);
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
|
||||
if (buffer_size < 1 + index_count / 3 + 16)
|
||||
return -2;
|
||||
|
||||
if ((buffer[0] & 0xf0) != kIndexHeader)
|
||||
return -1;
|
||||
|
||||
int version = buffer[0] & 0x0f;
|
||||
if (version > 1)
|
||||
return -1;
|
||||
|
||||
EdgeFifo edgefifo;
|
||||
memset(edgefifo, -1, sizeof(edgefifo));
|
||||
|
||||
VertexFifo vertexfifo;
|
||||
memset(vertexfifo, -1, sizeof(vertexfifo));
|
||||
|
||||
size_t edgefifooffset = 0;
|
||||
size_t vertexfifooffset = 0;
|
||||
|
||||
unsigned int next = 0;
|
||||
unsigned int last = 0;
|
||||
|
||||
int fecmax = version >= 1 ? 13 : 15;
|
||||
|
||||
// since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
|
||||
const unsigned char* code = buffer + 1;
|
||||
const unsigned char* data = code + index_count / 3;
|
||||
const unsigned char* data_safe_end = buffer + buffer_size - 16;
|
||||
|
||||
const unsigned char* codeaux_table = data_safe_end;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
// make sure we have enough data to read for a triangle
|
||||
// each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
|
||||
// after this we can be sure we can read without extra bounds checks
|
||||
if (data > data_safe_end)
|
||||
return -2;
|
||||
|
||||
unsigned char codetri = *code++;
|
||||
|
||||
if (codetri < 0xf0)
|
||||
{
|
||||
int fe = codetri >> 4;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
|
||||
unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
|
||||
|
||||
int fec = codetri & 15;
|
||||
|
||||
// note: this is the most common path in the entire decoder
|
||||
// inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
|
||||
if (fec < fecmax)
|
||||
{
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
|
||||
unsigned int c = (fec == 0) ? next : cf;
|
||||
|
||||
int fec0 = fec == 0;
|
||||
next += fec0;
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
|
||||
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int c = 0;
|
||||
|
||||
// fec - (fec ^ 3) decodes 13, 14 into -1, 1
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset);
|
||||
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// fast path: read codeaux from the table
|
||||
if (codetri < 0xfe)
|
||||
{
|
||||
unsigned char codeaux = codeaux_table[codetri & 15];
|
||||
|
||||
// note: table can't contain feb/fec=15
|
||||
int feb = codeaux >> 4;
|
||||
int fec = codeaux & 15;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
|
||||
unsigned int a = next++;
|
||||
|
||||
unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
|
||||
unsigned int b = (feb == 0) ? next : bf;
|
||||
|
||||
int feb0 = feb == 0;
|
||||
next += feb0;
|
||||
|
||||
unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
|
||||
unsigned int c = (fec == 0) ? next : cf;
|
||||
|
||||
int fec0 = fec == 0;
|
||||
next += fec0;
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
|
||||
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// slow path: read a full byte for codeaux instead of using a table lookup
|
||||
unsigned char codeaux = *data++;
|
||||
|
||||
int fea = codetri == 0xfe ? 0 : 15;
|
||||
int feb = codeaux >> 4;
|
||||
int fec = codeaux & 15;
|
||||
|
||||
// reset: codeaux is 0 but encoded as not-a-table
|
||||
if (codeaux == 0)
|
||||
next = 0;
|
||||
|
||||
// fifo reads are wrapped around 16 entry buffer
|
||||
// also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
|
||||
unsigned int a = (fea == 0) ? next++ : 0;
|
||||
unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
|
||||
unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fea == 15)
|
||||
last = a = decodeIndex(data, last);
|
||||
|
||||
if (feb == 15)
|
||||
last = b = decodeIndex(data, last);
|
||||
|
||||
if (fec == 15)
|
||||
last = c = decodeIndex(data, last);
|
||||
|
||||
// output triangle
|
||||
writeTriangle(destination, i, index_size, a, b, c);
|
||||
|
||||
// push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
|
||||
pushVertexFifo(vertexfifo, a, vertexfifooffset);
|
||||
pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
|
||||
pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
|
||||
|
||||
pushEdgeFifo(edgefifo, b, a, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, c, b, edgefifooffset);
|
||||
pushEdgeFifo(edgefifo, a, c, edgefifooffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we should've read all data bytes and stopped at the boundary between data and codeaux table
|
||||
if (data != data_safe_end)
|
||||
return -3;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
|
||||
if (buffer_size < 1 + index_count + 4)
|
||||
return 0;
|
||||
|
||||
int version = gEncodeIndexVersion;
|
||||
|
||||
buffer[0] = (unsigned char)(kSequenceHeader | version);
|
||||
|
||||
unsigned int last[2] = {};
|
||||
unsigned int current = 0;
|
||||
|
||||
unsigned char* data = buffer + 1;
|
||||
unsigned char* data_safe_end = buffer + buffer_size - 4;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
// make sure we have enough data to write
|
||||
// each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
|
||||
// after this we can be sure we can write without extra bounds checks
|
||||
if (data >= data_safe_end)
|
||||
return 0;
|
||||
|
||||
unsigned int index = indices[i];
|
||||
|
||||
// this is a heuristic that switches between baselines when the delta grows too large
|
||||
// we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
|
||||
// for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
|
||||
int cd = int(index - last[current]);
|
||||
current ^= ((cd < 0 ? -cd : cd) >= 30);
|
||||
|
||||
// encode delta from the last index
|
||||
unsigned int d = index - last[current];
|
||||
unsigned int v = (d << 1) ^ (int(d) >> 31);
|
||||
|
||||
// note: low bit encodes the index of the last baseline which will be used for reconstruction
|
||||
encodeVByte(data, (v << 1) | current);
|
||||
|
||||
// update last for the next iteration that uses it
|
||||
last[current] = index;
|
||||
}
|
||||
|
||||
// make sure we have enough space to write tail
|
||||
if (data > data_safe_end)
|
||||
return 0;
|
||||
|
||||
for (int k = 0; k < 4; ++k)
|
||||
*data++ = 0;
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
|
||||
{
|
||||
// compute number of bits required for each index
|
||||
unsigned int vertex_bits = 1;
|
||||
|
||||
while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
|
||||
vertex_bits++;
|
||||
|
||||
// worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
|
||||
unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
|
||||
|
||||
return 1 + index_count * vertex_groups + 4;
|
||||
}
|
||||
|
||||
int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per index and a 4-byte tail
|
||||
if (buffer_size < 1 + index_count + 4)
|
||||
return -2;
|
||||
|
||||
if ((buffer[0] & 0xf0) != kSequenceHeader)
|
||||
return -1;
|
||||
|
||||
int version = buffer[0] & 0x0f;
|
||||
if (version > 1)
|
||||
return -1;
|
||||
|
||||
const unsigned char* data = buffer + 1;
|
||||
const unsigned char* data_safe_end = buffer + buffer_size - 4;
|
||||
|
||||
unsigned int last[2] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
// make sure we have enough data to read
|
||||
// each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
|
||||
// after this we can be sure we can read without extra bounds checks
|
||||
if (data >= data_safe_end)
|
||||
return -2;
|
||||
|
||||
unsigned int v = decodeVByte(data);
|
||||
|
||||
// decode the index of the last baseline
|
||||
unsigned int current = v & 1;
|
||||
v >>= 1;
|
||||
|
||||
// reconstruct index as a delta
|
||||
unsigned int d = (v >> 1) ^ -int(v & 1);
|
||||
unsigned int index = last[current] + d;
|
||||
|
||||
// update last for the next iteration that uses it
|
||||
last[current] = index;
|
||||
|
||||
if (index_size == 2)
|
||||
{
|
||||
static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_cast<unsigned int*>(destination)[i] = index;
|
||||
}
|
||||
}
|
||||
|
||||
// we should've read all data bytes and stopped at the boundary between data and tail
|
||||
if (data != data_safe_end)
|
||||
return -3;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,347 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
|
||||
{
|
||||
// MurmurHash2
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
while (len >= 4)
|
||||
{
|
||||
unsigned int k = *reinterpret_cast<const unsigned int*>(key);
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
|
||||
key += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
struct VertexHasher
|
||||
{
|
||||
const unsigned char* vertices;
|
||||
size_t vertex_size;
|
||||
size_t vertex_stride;
|
||||
|
||||
size_t hash(unsigned int index) const
|
||||
{
|
||||
return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
|
||||
}
|
||||
|
||||
bool equal(unsigned int lhs, unsigned int rhs) const
|
||||
{
|
||||
return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct VertexStreamHasher
|
||||
{
|
||||
const meshopt_Stream* streams;
|
||||
size_t stream_count;
|
||||
|
||||
size_t hash(unsigned int index) const
|
||||
{
|
||||
unsigned int h = 0;
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
const meshopt_Stream& s = streams[i];
|
||||
const unsigned char* data = static_cast<const unsigned char*>(s.data);
|
||||
|
||||
h = hashUpdate4(h, data + index * s.stride, s.size);
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
bool equal(unsigned int lhs, unsigned int rhs) const
|
||||
{
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
const meshopt_Stream& s = streams[i];
|
||||
const unsigned char* data = static_cast<const unsigned char*>(s.data);
|
||||
|
||||
if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
static size_t hashBuckets(size_t count)
|
||||
{
|
||||
size_t buckets = 1;
|
||||
while (buckets < count)
|
||||
buckets *= 2;
|
||||
|
||||
return buckets;
|
||||
}
|
||||
|
||||
template <typename T, typename Hash>
|
||||
static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
|
||||
{
|
||||
assert(buckets > 0);
|
||||
assert((buckets & (buckets - 1)) == 0);
|
||||
|
||||
size_t hashmod = buckets - 1;
|
||||
size_t bucket = hash.hash(key) & hashmod;
|
||||
|
||||
for (size_t probe = 0; probe <= hashmod; ++probe)
|
||||
{
|
||||
T& item = table[bucket];
|
||||
|
||||
if (item == empty)
|
||||
return &item;
|
||||
|
||||
if (hash.equal(item, key))
|
||||
return &item;
|
||||
|
||||
// hash collision, quadratic probing
|
||||
bucket = (bucket + probe + 1) & hashmod;
|
||||
}
|
||||
|
||||
assert(false && "Hash table is full"); // unreachable
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
memset(destination, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size};
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices ? indices[i] : unsigned(i);
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (destination[index] == ~0u)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
{
|
||||
*entry = index;
|
||||
|
||||
destination[index] = next_vertex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(destination[*entry] != ~0u);
|
||||
|
||||
destination[index] = destination[*entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
||||
|
||||
size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(stream_count > 0 && stream_count <= 16);
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
assert(streams[i].size > 0 && streams[i].size <= 256);
|
||||
assert(streams[i].size <= streams[i].stride);
|
||||
}
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
memset(destination, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
VertexStreamHasher hasher = {streams, stream_count};
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices ? indices[i] : unsigned(i);
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (destination[index] == ~0u)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
{
|
||||
*entry = index;
|
||||
|
||||
destination[index] = next_vertex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(destination[*entry] != ~0u);
|
||||
|
||||
destination[index] = destination[*entry];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
||||
|
||||
void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
|
||||
{
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// support in-place remap
|
||||
if (destination == vertices)
|
||||
{
|
||||
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
|
||||
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
|
||||
vertices = vertices_copy;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
if (remap[i] != ~0u)
|
||||
{
|
||||
assert(remap[i] < vertex_count);
|
||||
|
||||
memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices ? indices[i] : unsigned(i);
|
||||
assert(remap[index] != ~0u);
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
assert(vertex_size <= vertex_stride);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride};
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (remap[index] == ~0u)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = index;
|
||||
|
||||
remap[index] = *entry;
|
||||
}
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(stream_count > 0 && stream_count <= 16);
|
||||
|
||||
for (size_t i = 0; i < stream_count; ++i)
|
||||
{
|
||||
assert(streams[i].size > 0 && streams[i].size <= 256);
|
||||
assert(streams[i].size <= streams[i].stride);
|
||||
}
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
VertexStreamHasher hasher = {streams, stream_count};
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (remap[index] == ~0u)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = index;
|
||||
|
||||
remap[index] = *entry;
|
||||
}
|
||||
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,948 @@
|
|||
/**
|
||||
* meshoptimizer - version 0.15
|
||||
*
|
||||
* Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at https://github.com/zeux/meshoptimizer
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end of this file.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* Version macro; major * 1000 + minor * 10 + patch */
|
||||
#define MESHOPTIMIZER_VERSION 150 /* 0.15 */
|
||||
|
||||
/* If no API is defined, assume default */
|
||||
#ifndef MESHOPTIMIZER_API
|
||||
#define MESHOPTIMIZER_API
|
||||
#endif
|
||||
|
||||
/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */
|
||||
#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API
|
||||
|
||||
/* C interface */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Vertex attribute stream, similar to glVertexPointer
|
||||
* Each element takes size bytes, with stride controlling the spacing between successive elements.
|
||||
*/
|
||||
struct meshopt_Stream
|
||||
{
|
||||
const void* data;
|
||||
size_t size;
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
|
||||
* As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
|
||||
* Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* indices can be NULL if the input is unindexed
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices
|
||||
* As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
|
||||
* To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream.
|
||||
* Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* indices can be NULL if the input is unindexed
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
/**
|
||||
* Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap
|
||||
*
|
||||
* destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap)
|
||||
* vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap);
|
||||
|
||||
/**
|
||||
* Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* indices can be NULL if the input is unindexed
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap);
|
||||
|
||||
/**
|
||||
* Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
|
||||
* All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer.
|
||||
* This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
|
||||
* Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
|
||||
|
||||
/**
|
||||
* Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
|
||||
* All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer.
|
||||
* This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
|
||||
* Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
/**
|
||||
* Vertex transform cache optimizer
|
||||
* Reorders indices to reduce the number of GPU vertex shader invocations
|
||||
* If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Vertex transform cache optimizer for strip-like caches
|
||||
* Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective
|
||||
* However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Vertex transform cache optimizer for FIFO caches
|
||||
* Reorders indices to reduce the number of GPU vertex shader invocations
|
||||
* Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache
|
||||
* If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* cache_size should be less than the actual GPU cache size to avoid cache thrashing
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
|
||||
|
||||
/**
|
||||
* Overdraw optimizer
|
||||
* Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw
|
||||
* If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
|
||||
|
||||
/**
|
||||
* Vertex fetch cache optimizer
|
||||
* Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing
|
||||
* Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
|
||||
* This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream.
|
||||
*
|
||||
* destination must contain enough space for the resulting vertex buffer (vertex_count elements)
|
||||
* indices is used both as an input and as an output index buffer
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Vertex fetch cache optimizer
|
||||
* Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing
|
||||
* Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
|
||||
* The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Index buffer encoder
|
||||
* Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original.
|
||||
* Input index buffer must represent a triangle list.
|
||||
* Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
|
||||
* For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first.
|
||||
*
|
||||
* buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size)
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Experimental: Set index encoder format version
|
||||
* version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version);
|
||||
|
||||
/**
|
||||
* Index buffer decoder
|
||||
* Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer
|
||||
* Returns 0 if decoding was successful, and an error code otherwise
|
||||
* The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Experimental: Index sequence encoder
|
||||
* Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original.
|
||||
* Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better.
|
||||
* Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
|
||||
*
|
||||
* buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Index sequence decoder
|
||||
* Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence
|
||||
* Returns 0 if decoding was successful, and an error code otherwise
|
||||
* The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
|
||||
*
|
||||
* destination must contain enough space for the resulting index sequence (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Vertex buffer encoder
|
||||
* Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original.
|
||||
* Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
|
||||
* This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream.
|
||||
* Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized.
|
||||
*
|
||||
* buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size)
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Experimental: Set vertex encoder format version
|
||||
* version must specify the data format version to encode; valid values are 0 (decodable by all library versions)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeVertexVersion(int version);
|
||||
|
||||
/**
|
||||
* Vertex buffer decoder
|
||||
* Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer
|
||||
* Returns 0 if decoding was successful, and an error code otherwise
|
||||
* The decoder is safe to use for untrusted input, but it may produce garbage data.
|
||||
*
|
||||
* destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes)
|
||||
*/
|
||||
MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Vertex buffer filters
|
||||
* These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
|
||||
* count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation.
|
||||
*
|
||||
* meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
|
||||
* Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
|
||||
*
|
||||
* meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct.
|
||||
* Each component is stored as an 16-bit integer; stride must be equal to 8.
|
||||
*
|
||||
* meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
|
||||
* Each 32-bit component is decoded in isolation; stride must be divisible by 4.
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier
|
||||
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
|
||||
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
|
||||
* If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier (sloppy)
|
||||
* Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
|
||||
* The algorithm doesn't preserve mesh topology but is always able to reach target triangle count.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
|
||||
|
||||
/**
|
||||
* Experimental: Point cloud simplifier
|
||||
* Reduces the number of points in the cloud to reach the given target
|
||||
* Returns the number of points after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count);
|
||||
|
||||
/**
|
||||
* Mesh stripifier
|
||||
* Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles
|
||||
* Returns the number of indices in the resulting strip, with destination containing new index data
|
||||
* For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
|
||||
* Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound
|
||||
* restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index);
|
||||
MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count);
|
||||
|
||||
/**
|
||||
* Mesh unstripifier
|
||||
* Converts a triangle strip to a triangle list
|
||||
* Returns the number of indices in the resulting list, with destination containing new index data
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index);
|
||||
MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count);
|
||||
|
||||
struct meshopt_VertexCacheStatistics
|
||||
{
|
||||
unsigned int vertices_transformed;
|
||||
unsigned int warps_executed;
|
||||
float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */
|
||||
float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */
|
||||
};
|
||||
|
||||
/**
|
||||
* Vertex transform cache analyzer
|
||||
* Returns cache hit statistics using a simplified FIFO model
|
||||
* Results may not match actual GPU performance
|
||||
*/
|
||||
MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size);
|
||||
|
||||
struct meshopt_OverdrawStatistics
|
||||
{
|
||||
unsigned int pixels_covered;
|
||||
unsigned int pixels_shaded;
|
||||
float overdraw; /* shaded pixels / covered pixels; best case 1.0 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Overdraw analyzer
|
||||
* Returns overdraw statistics using a software rasterizer
|
||||
* Results may not match actual GPU performance
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
*/
|
||||
MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
struct meshopt_VertexFetchStatistics
|
||||
{
|
||||
unsigned int bytes_fetched;
|
||||
float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
|
||||
};
|
||||
|
||||
/**
|
||||
* Vertex fetch cache analyzer
|
||||
* Returns cache hit statistics using a simplified direct mapped model
|
||||
* Results may not match actual GPU performance
|
||||
*/
|
||||
MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
|
||||
|
||||
struct meshopt_Meshlet
|
||||
{
|
||||
unsigned int vertices[64];
|
||||
unsigned char indices[126][3];
|
||||
unsigned char triangle_count;
|
||||
unsigned char vertex_count;
|
||||
};
|
||||
|
||||
/**
|
||||
* Experimental: Meshlet builder
|
||||
* Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
|
||||
* The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
|
||||
* For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
|
||||
*
|
||||
* destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
|
||||
* max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
|
||||
|
||||
struct meshopt_Bounds
|
||||
{
|
||||
/* bounding sphere, useful for frustum and occlusion culling */
|
||||
float center[3];
|
||||
float radius;
|
||||
|
||||
/* normal cone, useful for backface culling */
|
||||
float cone_apex[3];
|
||||
float cone_axis[3];
|
||||
float cone_cutoff; /* = cos(angle/2) */
|
||||
|
||||
/* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */
|
||||
signed char cone_axis_s8[3];
|
||||
signed char cone_cutoff_s8;
|
||||
};
|
||||
|
||||
/**
|
||||
* Experimental: Cluster bounds generator
|
||||
* Creates bounding volumes that can be used for frustum, backface and occlusion culling.
|
||||
*
|
||||
* For backface culling with orthographic projection, use the following formula to reject backfacing clusters:
|
||||
* dot(view, cone_axis) >= cone_cutoff
|
||||
*
|
||||
* For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff:
|
||||
* dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff
|
||||
*
|
||||
* Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead:
|
||||
* dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position)
|
||||
* or an equivalent formula that doesn't have a singularity at center = camera_position:
|
||||
* dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
|
||||
*
|
||||
* The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere
|
||||
* to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Spatial sorter
|
||||
* Generates a remap table that can be used to reorder points for spatial locality.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Spatial sorter
|
||||
* Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Set allocation callbacks
|
||||
* These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library.
|
||||
* Note that all algorithms only allocate memory for temporary use.
|
||||
* allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first.
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*));
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
/* Quantization into commonly supported data formats */
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Quantize a float in [0..1] range into an N-bit fixed point unorm value
|
||||
* Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion
|
||||
* Maximum reconstruction error: 1/2^(N+1)
|
||||
*/
|
||||
inline int meshopt_quantizeUnorm(float v, int N);
|
||||
|
||||
/**
|
||||
* Quantize a float in [-1..1] range into an N-bit fixed point snorm value
|
||||
* Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions)
|
||||
* Maximum reconstruction error: 1/2^N
|
||||
*/
|
||||
inline int meshopt_quantizeSnorm(float v, int N);
|
||||
|
||||
/**
|
||||
* Quantize a float into half-precision floating point value
|
||||
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
|
||||
* Representable magnitude range: [6e-5; 65504]
|
||||
* Maximum relative reconstruction error: 5e-4
|
||||
*/
|
||||
inline unsigned short meshopt_quantizeHalf(float v);
|
||||
|
||||
/**
|
||||
* Quantize a float into a floating point value with a limited number of significant mantissa bits
|
||||
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
|
||||
* Assumes N is in a valid mantissa precision range, which is 1..23
|
||||
*/
|
||||
inline float meshopt_quantizeFloat(float v, int N);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* C++ template interface
|
||||
*
|
||||
* These functions mirror the C interface the library provides, providing template-based overloads so that
|
||||
* the caller can use an arbitrary type for the index data, both for input and output.
|
||||
* When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not,
|
||||
* the wrappers end up allocating memory and copying index data to convert from one type to another.
|
||||
*/
|
||||
#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
|
||||
template <typename T>
|
||||
inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap);
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
|
||||
template <typename T>
|
||||
inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count);
|
||||
template <typename T>
|
||||
inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
|
||||
template <typename T>
|
||||
inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
|
||||
template <typename T>
|
||||
inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
|
||||
template <typename T>
|
||||
inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
|
||||
template <typename T>
|
||||
inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index);
|
||||
template <typename T>
|
||||
inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size);
|
||||
template <typename T>
|
||||
inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
template <typename T>
|
||||
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
template <typename T>
|
||||
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
template <typename T>
|
||||
inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
#endif
|
||||
|
||||
/* Inline implementation */
|
||||
#ifdef __cplusplus
|
||||
inline int meshopt_quantizeUnorm(float v, int N)
|
||||
{
|
||||
const float scale = float((1 << N) - 1);
|
||||
|
||||
v = (v >= 0) ? v : 0;
|
||||
v = (v <= 1) ? v : 1;
|
||||
|
||||
return int(v * scale + 0.5f);
|
||||
}
|
||||
|
||||
inline int meshopt_quantizeSnorm(float v, int N)
|
||||
{
|
||||
const float scale = float((1 << (N - 1)) - 1);
|
||||
|
||||
float round = (v >= 0 ? 0.5f : -0.5f);
|
||||
|
||||
v = (v >= -1) ? v : -1;
|
||||
v = (v <= +1) ? v : +1;
|
||||
|
||||
return int(v * scale + round);
|
||||
}
|
||||
|
||||
inline unsigned short meshopt_quantizeHalf(float v)
|
||||
{
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
int s = (ui >> 16) & 0x8000;
|
||||
int em = ui & 0x7fffffff;
|
||||
|
||||
/* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
|
||||
int h = (em - (112 << 23) + (1 << 12)) >> 13;
|
||||
|
||||
/* underflow: flush to zero; 113 encodes exponent -14 */
|
||||
h = (em < (113 << 23)) ? 0 : h;
|
||||
|
||||
/* overflow: infinity; 143 encodes exponent 16 */
|
||||
h = (em >= (143 << 23)) ? 0x7c00 : h;
|
||||
|
||||
/* NaN; note that we convert all types of NaN to qNaN */
|
||||
h = (em > (255 << 23)) ? 0x7e00 : h;
|
||||
|
||||
return (unsigned short)(s | h);
|
||||
}
|
||||
|
||||
inline float meshopt_quantizeFloat(float v, int N)
|
||||
{
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
const int mask = (1 << (23 - N)) - 1;
|
||||
const int round = (1 << (23 - N)) >> 1;
|
||||
|
||||
int e = ui & 0x7f800000;
|
||||
unsigned int rui = (ui + round) & ~mask;
|
||||
|
||||
/* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
|
||||
ui = e == 0x7f800000 ? ui : rui;
|
||||
|
||||
/* flush denormals to zero */
|
||||
ui = e == 0 ? 0 : ui;
|
||||
|
||||
u.ui = ui;
|
||||
return u.f;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Internal implementation helpers */
|
||||
#ifdef __cplusplus
|
||||
class meshopt_Allocator
|
||||
{
|
||||
public:
|
||||
template <typename T>
|
||||
struct StorageT
|
||||
{
|
||||
static void* (*allocate)(size_t);
|
||||
static void (*deallocate)(void*);
|
||||
};
|
||||
|
||||
typedef StorageT<void> Storage;
|
||||
|
||||
meshopt_Allocator()
|
||||
: blocks()
|
||||
, count(0)
|
||||
{
|
||||
}
|
||||
|
||||
~meshopt_Allocator()
|
||||
{
|
||||
for (size_t i = count; i > 0; --i)
|
||||
Storage::deallocate(blocks[i - 1]);
|
||||
}
|
||||
|
||||
template <typename T> T* allocate(size_t size)
|
||||
{
|
||||
assert(count < sizeof(blocks) / sizeof(blocks[0]));
|
||||
T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
|
||||
blocks[count++] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
void* blocks[24];
|
||||
size_t count;
|
||||
};
|
||||
|
||||
// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
|
||||
template <typename T> void* (*meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
|
||||
template <typename T> void (*meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
|
||||
#endif
|
||||
|
||||
/* Inline implementation for C++ templated wrappers */
|
||||
#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
|
||||
template <typename T, bool ZeroCopy = sizeof(T) == sizeof(unsigned int)>
|
||||
struct meshopt_IndexAdapter;
|
||||
|
||||
template <typename T>
|
||||
struct meshopt_IndexAdapter<T, false>
|
||||
{
|
||||
T* result;
|
||||
unsigned int* data;
|
||||
size_t count;
|
||||
|
||||
meshopt_IndexAdapter(T* result_, const T* input, size_t count_)
|
||||
: result(result_)
|
||||
, data(0)
|
||||
, count(count_)
|
||||
{
|
||||
size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int);
|
||||
|
||||
data = static_cast<unsigned int*>(meshopt_Allocator::Storage::allocate(size));
|
||||
|
||||
if (input)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
data[i] = input[i];
|
||||
}
|
||||
}
|
||||
|
||||
~meshopt_IndexAdapter()
|
||||
{
|
||||
if (result)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
result[i] = T(data[i]);
|
||||
}
|
||||
|
||||
meshopt_Allocator::Storage::deallocate(data);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct meshopt_IndexAdapter<T, true>
|
||||
{
|
||||
unsigned int* data;
|
||||
|
||||
meshopt_IndexAdapter(T* result, const T* input, size_t)
|
||||
: data(reinterpret_cast<unsigned int*>(result ? result : const_cast<T*>(input)))
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
|
||||
return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
|
||||
return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> inout(indices, indices, index_count);
|
||||
|
||||
return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
|
||||
(void)index_size_valid;
|
||||
|
||||
return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
|
||||
{
|
||||
char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
|
||||
(void)index_size_valid;
|
||||
|
||||
return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, target_index_count);
|
||||
|
||||
return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 5);
|
||||
|
||||
return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3);
|
||||
|
||||
return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
|
||||
return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2016-2020 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
|
@ -0,0 +1,230 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Nicolas Capens. Advanced Rasterization. 2004
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const int kViewport = 256;
|
||||
|
||||
struct OverdrawBuffer
|
||||
{
|
||||
float z[kViewport][kViewport][2];
|
||||
unsigned int overdraw[kViewport][kViewport][2];
|
||||
};
|
||||
|
||||
#ifndef min
|
||||
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#ifndef max
|
||||
#define max(a, b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
|
||||
{
|
||||
// z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
|
||||
// z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
|
||||
// (x2-x1 y2-y1)(dzdx) = (z2-z1)
|
||||
// (x3-x1 y3-y1)(dzdy) (z3-z1)
|
||||
// we'll solve it with Cramer's rule
|
||||
float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
|
||||
float invdet = (det == 0) ? 0 : 1 / det;
|
||||
|
||||
dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet;
|
||||
dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
// half-space fixed point triangle rasterizer
|
||||
static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
|
||||
{
|
||||
// compute depth gradients
|
||||
float DZx, DZy;
|
||||
float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
|
||||
int sign = det > 0;
|
||||
|
||||
// flip backfacing triangles to simplify rasterization logic
|
||||
if (sign)
|
||||
{
|
||||
// flipping v2 & v3 preserves depth gradients since they're based on v1
|
||||
float t;
|
||||
t = v2x, v2x = v3x, v3x = t;
|
||||
t = v2y, v2y = v3y, v3y = t;
|
||||
t = v2z, v2z = v3z, v3z = t;
|
||||
|
||||
// flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
|
||||
v1z = kViewport - v1z;
|
||||
DZx = -DZx;
|
||||
DZy = -DZy;
|
||||
}
|
||||
|
||||
// coordinates, 28.4 fixed point
|
||||
int X1 = int(16.0f * v1x + 0.5f);
|
||||
int X2 = int(16.0f * v2x + 0.5f);
|
||||
int X3 = int(16.0f * v3x + 0.5f);
|
||||
|
||||
int Y1 = int(16.0f * v1y + 0.5f);
|
||||
int Y2 = int(16.0f * v2y + 0.5f);
|
||||
int Y3 = int(16.0f * v3y + 0.5f);
|
||||
|
||||
// bounding rectangle, clipped against viewport
|
||||
// since we rasterize pixels with covered centers, min >0.5 should round up
|
||||
// as for max, due to top-left filling convention we will never rasterize right/bottom edges
|
||||
// so max >= 0.5 should round down
|
||||
int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
|
||||
int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
|
||||
int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
|
||||
int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
|
||||
|
||||
// deltas, 28.4 fixed point
|
||||
int DX12 = X1 - X2;
|
||||
int DX23 = X2 - X3;
|
||||
int DX31 = X3 - X1;
|
||||
|
||||
int DY12 = Y1 - Y2;
|
||||
int DY23 = Y2 - Y3;
|
||||
int DY31 = Y3 - Y1;
|
||||
|
||||
// fill convention correction
|
||||
int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
|
||||
int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
|
||||
int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
|
||||
|
||||
// half edge equations, 24.8 fixed point
|
||||
// note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
|
||||
int FX = (minx << 4) + 8;
|
||||
int FY = (miny << 4) + 8;
|
||||
int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
|
||||
int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
|
||||
int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
|
||||
float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
|
||||
|
||||
for (int y = miny; y < maxy; y++)
|
||||
{
|
||||
int CX1 = CY1;
|
||||
int CX2 = CY2;
|
||||
int CX3 = CY3;
|
||||
float ZX = ZY;
|
||||
|
||||
for (int x = minx; x < maxx; x++)
|
||||
{
|
||||
// check if all CXn are non-negative
|
||||
if ((CX1 | CX2 | CX3) >= 0)
|
||||
{
|
||||
if (ZX >= buffer->z[y][x][sign])
|
||||
{
|
||||
buffer->z[y][x][sign] = ZX;
|
||||
buffer->overdraw[y][x][sign]++;
|
||||
}
|
||||
}
|
||||
|
||||
// signed left shift is UB for negative numbers so use unsigned-signed casts
|
||||
CX1 -= int(unsigned(DY12) << 4);
|
||||
CX2 -= int(unsigned(DY23) << 4);
|
||||
CX3 -= int(unsigned(DY31) << 4);
|
||||
ZX += DZx;
|
||||
}
|
||||
|
||||
// signed left shift is UB for negative numbers so use unsigned-signed casts
|
||||
CY1 += int(unsigned(DX12) << 4);
|
||||
CY2 += int(unsigned(DX23) << 4);
|
||||
CY3 += int(unsigned(DX31) << 4);
|
||||
ZY += DZy;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
meshopt_OverdrawStatistics result = {};
|
||||
|
||||
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions + i * vertex_stride_float;
|
||||
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
minv[j] = min(minv[j], v[j]);
|
||||
maxv[j] = max(maxv[j], v[j]);
|
||||
}
|
||||
}
|
||||
|
||||
float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
|
||||
float scale = kViewport / extent;
|
||||
|
||||
float* triangles = allocator.allocate<float>(index_count * 3);
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
const float* v = vertex_positions + index * vertex_stride_float;
|
||||
|
||||
triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
|
||||
triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
|
||||
triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
|
||||
}
|
||||
|
||||
OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1);
|
||||
|
||||
for (int axis = 0; axis < 3; ++axis)
|
||||
{
|
||||
memset(buffer, 0, sizeof(OverdrawBuffer));
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
const float* vn0 = &triangles[3 * (i + 0)];
|
||||
const float* vn1 = &triangles[3 * (i + 1)];
|
||||
const float* vn2 = &triangles[3 * (i + 2)];
|
||||
|
||||
switch (axis)
|
||||
{
|
||||
case 0:
|
||||
rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
|
||||
break;
|
||||
case 1:
|
||||
rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
|
||||
break;
|
||||
case 2:
|
||||
rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int y = 0; y < kViewport; ++y)
|
||||
for (int x = 0; x < kViewport; ++x)
|
||||
for (int s = 0; s < 2; ++s)
|
||||
{
|
||||
unsigned int overdraw = buffer->overdraw[y][x][s];
|
||||
|
||||
result.pixels_covered += overdraw > 0;
|
||||
result.pixels_shaded += overdraw;
|
||||
}
|
||||
}
|
||||
|
||||
result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
|
||||
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,333 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float mesh_centroid[3] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
const float* p = vertex_positions + vertex_stride_float * indices[i];
|
||||
|
||||
mesh_centroid[0] += p[0];
|
||||
mesh_centroid[1] += p[1];
|
||||
mesh_centroid[2] += p[2];
|
||||
}
|
||||
|
||||
mesh_centroid[0] /= index_count;
|
||||
mesh_centroid[1] /= index_count;
|
||||
mesh_centroid[2] /= index_count;
|
||||
|
||||
for (size_t cluster = 0; cluster < cluster_count; ++cluster)
|
||||
{
|
||||
size_t cluster_begin = clusters[cluster] * 3;
|
||||
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
|
||||
assert(cluster_begin < cluster_end);
|
||||
|
||||
float cluster_area = 0;
|
||||
float cluster_centroid[3] = {};
|
||||
float cluster_normal[3] = {};
|
||||
|
||||
for (size_t i = cluster_begin; i < cluster_end; i += 3)
|
||||
{
|
||||
const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
|
||||
const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
|
||||
const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
|
||||
|
||||
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
|
||||
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
|
||||
|
||||
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
|
||||
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
|
||||
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
|
||||
|
||||
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
|
||||
|
||||
cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
|
||||
cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
|
||||
cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
|
||||
cluster_normal[0] += normalx;
|
||||
cluster_normal[1] += normaly;
|
||||
cluster_normal[2] += normalz;
|
||||
cluster_area += area;
|
||||
}
|
||||
|
||||
float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
|
||||
|
||||
cluster_centroid[0] *= inv_cluster_area;
|
||||
cluster_centroid[1] *= inv_cluster_area;
|
||||
cluster_centroid[2] *= inv_cluster_area;
|
||||
|
||||
float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
|
||||
float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
|
||||
|
||||
cluster_normal[0] *= inv_cluster_normal_length;
|
||||
cluster_normal[1] *= inv_cluster_normal_length;
|
||||
cluster_normal[2] *= inv_cluster_normal_length;
|
||||
|
||||
float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
|
||||
|
||||
sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
|
||||
}
|
||||
}
|
||||
|
||||
static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
|
||||
{
|
||||
// compute sort data bounds and renormalize, using fixed point snorm
|
||||
float sort_data_max = 1e-3f;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
float dpa = fabsf(sort_data[i]);
|
||||
|
||||
sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
|
||||
}
|
||||
|
||||
const int sort_bits = 11;
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
// note that we flip distribution since high dot product should come first
|
||||
float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
|
||||
|
||||
sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
|
||||
}
|
||||
|
||||
// fill histogram for counting sort
|
||||
unsigned int histogram[1 << sort_bits];
|
||||
memset(histogram, 0, sizeof(histogram));
|
||||
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
histogram[sort_keys[i]]++;
|
||||
}
|
||||
|
||||
// compute offsets based on histogram data
|
||||
size_t histogram_sum = 0;
|
||||
|
||||
for (size_t i = 0; i < 1 << sort_bits; ++i)
|
||||
{
|
||||
size_t count = histogram[i];
|
||||
histogram[i] = unsigned(histogram_sum);
|
||||
histogram_sum += count;
|
||||
}
|
||||
|
||||
assert(histogram_sum == cluster_count);
|
||||
|
||||
// compute sort order based on offsets
|
||||
for (size_t i = 0; i < cluster_count; ++i)
|
||||
{
|
||||
sort_order[histogram[sort_keys[i]]++] = unsigned(i);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
|
||||
{
|
||||
unsigned int cache_misses = 0;
|
||||
|
||||
// if vertex is not in cache, put it in cache
|
||||
if (timestamp - cache_timestamps[a] > cache_size)
|
||||
{
|
||||
cache_timestamps[a] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
if (timestamp - cache_timestamps[b] > cache_size)
|
||||
{
|
||||
cache_timestamps[b] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
if (timestamp - cache_timestamps[c] > cache_size)
|
||||
{
|
||||
cache_timestamps[c] = timestamp++;
|
||||
cache_misses++;
|
||||
}
|
||||
|
||||
return cache_misses;
|
||||
}
|
||||
|
||||
static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
|
||||
{
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
size_t result = 0;
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
|
||||
// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
|
||||
// suggests an inefficiency in the vertex cache optimization algorithm
|
||||
// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
|
||||
if (i == 0 || m == 3)
|
||||
{
|
||||
destination[result++] = unsigned(i);
|
||||
}
|
||||
}
|
||||
|
||||
assert(result <= index_count / 3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
|
||||
{
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = 0;
|
||||
|
||||
size_t result = 0;
|
||||
|
||||
for (size_t it = 0; it < cluster_count; ++it)
|
||||
{
|
||||
size_t start = clusters[it];
|
||||
size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
|
||||
assert(start < end);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
// measure cluster ACMR
|
||||
unsigned int cluster_misses = 0;
|
||||
|
||||
for (size_t i = start; i < end; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
cluster_misses += m;
|
||||
}
|
||||
|
||||
float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
|
||||
|
||||
// first cluster always starts from the hard cluster boundary
|
||||
destination[result++] = unsigned(start);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
unsigned int running_misses = 0;
|
||||
unsigned int running_faces = 0;
|
||||
|
||||
for (size_t i = start; i < end; ++i)
|
||||
{
|
||||
unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
|
||||
|
||||
running_misses += m;
|
||||
running_faces += 1;
|
||||
|
||||
if (float(running_misses) / float(running_faces) <= cluster_threshold)
|
||||
{
|
||||
// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
|
||||
// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
|
||||
// cluster is empty; however, the 'pop_back' after the loop will clean it up
|
||||
destination[result++] = unsigned(i + 1);
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
|
||||
running_misses = 0;
|
||||
running_faces = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// each time we reach the target ACMR we flush the cluster
|
||||
// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
|
||||
// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
|
||||
// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
|
||||
// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
|
||||
// to the cluster boundary array which we need to remove anyway - this code will do that automatically
|
||||
if (destination[result - 1] != start)
|
||||
{
|
||||
result--;
|
||||
}
|
||||
}
|
||||
|
||||
assert(result >= cluster_count);
|
||||
assert(result <= index_count / 3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
unsigned int cache_size = 16;
|
||||
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
|
||||
// generate hard boundaries from full-triangle cache misses
|
||||
unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3);
|
||||
size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
|
||||
|
||||
// generate soft boundaries
|
||||
unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1);
|
||||
size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
|
||||
|
||||
const unsigned int* clusters = soft_clusters;
|
||||
size_t cluster_count = soft_cluster_count;
|
||||
|
||||
// fill sort data
|
||||
float* sort_data = allocator.allocate<float>(cluster_count);
|
||||
calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
|
||||
|
||||
// sort clusters using sort data
|
||||
unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
|
||||
unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
|
||||
calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
|
||||
|
||||
// fill output buffer
|
||||
size_t offset = 0;
|
||||
|
||||
for (size_t it = 0; it < cluster_count; ++it)
|
||||
{
|
||||
unsigned int cluster = sort_order[it];
|
||||
assert(cluster < cluster_count);
|
||||
|
||||
size_t cluster_begin = clusters[cluster] * 3;
|
||||
size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
|
||||
assert(cluster_begin < cluster_end);
|
||||
|
||||
memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
|
||||
offset += cluster_end - cluster_begin;
|
||||
}
|
||||
|
||||
assert(offset == index_count);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,194 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Fabian Giesen. Decoding Morton codes. 2009
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
// "Insert" two 0 bits after each of the 10 low bits of x
|
||||
inline unsigned int part1By2(unsigned int x)
|
||||
{
|
||||
x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
|
||||
x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
|
||||
x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
|
||||
x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
|
||||
x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
|
||||
return x;
|
||||
}
|
||||
|
||||
static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
|
||||
float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions_data + i * vertex_stride_float;
|
||||
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
float vj = v[j];
|
||||
|
||||
minv[j] = minv[j] > vj ? vj : minv[j];
|
||||
maxv[j] = maxv[j] < vj ? vj : maxv[j];
|
||||
}
|
||||
}
|
||||
|
||||
float extent = 0.f;
|
||||
|
||||
extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
|
||||
extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
|
||||
extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
|
||||
|
||||
float scale = extent == 0 ? 0.f : 1.f / extent;
|
||||
|
||||
// generate Morton order based on the position inside a unit cube
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* v = vertex_positions_data + i * vertex_stride_float;
|
||||
|
||||
int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
|
||||
int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
|
||||
int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
|
||||
|
||||
result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
|
||||
{
|
||||
memset(hist, 0, sizeof(hist));
|
||||
|
||||
// compute 3 10-bit histograms in parallel
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int id = data[i];
|
||||
|
||||
hist[(id >> 0) & 1023][0]++;
|
||||
hist[(id >> 10) & 1023][1]++;
|
||||
hist[(id >> 20) & 1023][2]++;
|
||||
}
|
||||
|
||||
unsigned int sumx = 0, sumy = 0, sumz = 0;
|
||||
|
||||
// replace histogram data with prefix histogram sums in-place
|
||||
for (int i = 0; i < 1024; ++i)
|
||||
{
|
||||
unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
|
||||
|
||||
hist[i][0] = sumx;
|
||||
hist[i][1] = sumy;
|
||||
hist[i][2] = sumz;
|
||||
|
||||
sumx += hx;
|
||||
sumy += hy;
|
||||
sumz += hz;
|
||||
}
|
||||
|
||||
assert(sumx == count && sumy == count && sumz == count);
|
||||
}
|
||||
|
||||
static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
|
||||
{
|
||||
int bitoff = pass * 10;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int id = (keys[source[i]] >> bitoff) & 1023;
|
||||
|
||||
destination[hist[id][pass]++] = source[i];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
unsigned int* keys = allocator.allocate<unsigned int>(vertex_count);
|
||||
computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
|
||||
unsigned int hist[1024][3];
|
||||
computeHistogram(hist, keys, vertex_count);
|
||||
|
||||
unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
destination[i] = unsigned(i);
|
||||
|
||||
// 3-pass radix sort computes the resulting order into scratch
|
||||
radixPass(scratch, destination, keys, vertex_count, hist, 0);
|
||||
radixPass(destination, scratch, keys, vertex_count, hist, 1);
|
||||
radixPass(scratch, destination, keys, vertex_count, hist, 2);
|
||||
|
||||
// since our remap table is mapping old=>new, we need to reverse it
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
destination[scratch[i]] = unsigned(i);
|
||||
}
|
||||
|
||||
void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
(void)vertex_count;
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
float* centroids = allocator.allocate<float>(face_count * 3);
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
const float* va = vertex_positions + a * vertex_stride_float;
|
||||
const float* vb = vertex_positions + b * vertex_stride_float;
|
||||
const float* vc = vertex_positions + c * vertex_stride_float;
|
||||
|
||||
centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
|
||||
centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
|
||||
centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
|
||||
}
|
||||
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(face_count);
|
||||
|
||||
meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
|
||||
|
||||
// support in-order remap
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
unsigned int r = remap[i];
|
||||
|
||||
destination[r * 3 + 0] = a;
|
||||
destination[r * 3 + 1] = b;
|
||||
destination[r * 3 + 2] = c;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
unsigned int iv = ~0u;
|
||||
|
||||
for (size_t i = 0; i < buffer_size; ++i)
|
||||
{
|
||||
unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
|
||||
unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc;
|
||||
|
||||
if (v < iv)
|
||||
{
|
||||
index = unsigned(i);
|
||||
iv = v;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
|
||||
{
|
||||
for (size_t i = 0; i < buffer_size; ++i)
|
||||
{
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
|
||||
if (e0 == a && e1 == b)
|
||||
return (int(i) << 2) | 2;
|
||||
else if (e0 == b && e1 == c)
|
||||
return (int(i) << 2) | 0;
|
||||
else if (e0 == c && e1 == a)
|
||||
return (int(i) << 2) | 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
|
||||
{
|
||||
assert(destination != indices);
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
using namespace meshopt;
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
const size_t buffer_capacity = 8;
|
||||
|
||||
unsigned int buffer[buffer_capacity][3] = {};
|
||||
unsigned int buffer_size = 0;
|
||||
|
||||
size_t index_offset = 0;
|
||||
|
||||
unsigned int strip[2] = {};
|
||||
unsigned int parity = 0;
|
||||
|
||||
size_t strip_size = 0;
|
||||
|
||||
// compute vertex valence; this is used to prioritize starting triangle for strips
|
||||
unsigned int* valence = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(valence, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
valence[index]++;
|
||||
}
|
||||
|
||||
int next = -1;
|
||||
|
||||
while (buffer_size > 0 || index_offset < index_count)
|
||||
{
|
||||
assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
|
||||
|
||||
// fill triangle buffer
|
||||
while (buffer_size < buffer_capacity && index_offset < index_count)
|
||||
{
|
||||
buffer[buffer_size][0] = indices[index_offset + 0];
|
||||
buffer[buffer_size][1] = indices[index_offset + 1];
|
||||
buffer[buffer_size][2] = indices[index_offset + 2];
|
||||
|
||||
buffer_size++;
|
||||
index_offset += 3;
|
||||
}
|
||||
|
||||
assert(buffer_size > 0);
|
||||
|
||||
if (next >= 0)
|
||||
{
|
||||
unsigned int i = next >> 2;
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
unsigned int v = buffer[i][next & 3];
|
||||
|
||||
// ordered removal from the buffer
|
||||
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
|
||||
buffer_size--;
|
||||
|
||||
// update vertex valences for strip start heuristic
|
||||
valence[a]--;
|
||||
valence[b]--;
|
||||
valence[c]--;
|
||||
|
||||
// find next triangle (note that edge order flips on every iteration)
|
||||
// in some cases we need to perform a swap to pick a different outgoing triangle edge
|
||||
// for [a b c], the default strip edge is [b c], but we might want to use [a c]
|
||||
int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
|
||||
int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
|
||||
|
||||
if (cont < 0 && swap >= 0)
|
||||
{
|
||||
// [a b c] => [a b a c]
|
||||
destination[strip_size++] = strip[0];
|
||||
destination[strip_size++] = v;
|
||||
|
||||
// next strip has same winding
|
||||
// ? a b => b a v
|
||||
strip[1] = v;
|
||||
|
||||
next = swap;
|
||||
}
|
||||
else
|
||||
{
|
||||
// emit the next vertex in the strip
|
||||
destination[strip_size++] = v;
|
||||
|
||||
// next strip has flipped winding
|
||||
strip[0] = strip[1];
|
||||
strip[1] = v;
|
||||
parity ^= 1;
|
||||
|
||||
next = cont;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we didn't find anything, we need to find the next new triangle
|
||||
// we use a heuristic to maximize the strip length
|
||||
unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]);
|
||||
unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
|
||||
|
||||
// ordered removal from the buffer
|
||||
memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
|
||||
buffer_size--;
|
||||
|
||||
// update vertex valences for strip start heuristic
|
||||
valence[a]--;
|
||||
valence[b]--;
|
||||
valence[c]--;
|
||||
|
||||
// we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
|
||||
int ea = findStripNext(buffer, buffer_size, c, b);
|
||||
int eb = findStripNext(buffer, buffer_size, a, c);
|
||||
int ec = findStripNext(buffer, buffer_size, b, a);
|
||||
|
||||
// in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
|
||||
// triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
|
||||
// reasons - slightly improves the stripification efficiency
|
||||
int mine = INT_MAX;
|
||||
mine = (ea >= 0 && mine > ea) ? ea : mine;
|
||||
mine = (eb >= 0 && mine > eb) ? eb : mine;
|
||||
mine = (ec >= 0 && mine > ec) ? ec : mine;
|
||||
|
||||
if (ea == mine)
|
||||
{
|
||||
// keep abc
|
||||
next = ea;
|
||||
}
|
||||
else if (eb == mine)
|
||||
{
|
||||
// abc -> bca
|
||||
unsigned int t = a;
|
||||
a = b, b = c, c = t;
|
||||
|
||||
next = eb;
|
||||
}
|
||||
else if (ec == mine)
|
||||
{
|
||||
// abc -> cab
|
||||
unsigned int t = c;
|
||||
c = b, b = a, a = t;
|
||||
|
||||
next = ec;
|
||||
}
|
||||
|
||||
if (restart_index)
|
||||
{
|
||||
if (strip_size)
|
||||
destination[strip_size++] = restart_index;
|
||||
|
||||
destination[strip_size++] = a;
|
||||
destination[strip_size++] = b;
|
||||
destination[strip_size++] = c;
|
||||
|
||||
// new strip always starts with the same edge winding
|
||||
strip[0] = b;
|
||||
strip[1] = c;
|
||||
parity = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strip_size)
|
||||
{
|
||||
// connect last strip using degenerate triangles
|
||||
destination[strip_size++] = strip[1];
|
||||
destination[strip_size++] = a;
|
||||
}
|
||||
|
||||
// note that we may need to flip the emitted triangle based on parity
|
||||
// we always end up with outgoing edge "cb" in the end
|
||||
unsigned int e0 = parity ? c : b;
|
||||
unsigned int e1 = parity ? b : c;
|
||||
|
||||
destination[strip_size++] = a;
|
||||
destination[strip_size++] = e0;
|
||||
destination[strip_size++] = e1;
|
||||
|
||||
strip[0] = e0;
|
||||
strip[1] = e1;
|
||||
parity ^= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strip_size;
|
||||
}
|
||||
|
||||
size_t meshopt_stripifyBound(size_t index_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
// worst case without restarts is 2 degenerate indices and 3 indices per triangle
|
||||
// worst case with restarts is 1 restart index and 3 indices per triangle
|
||||
return (index_count / 3) * 5;
|
||||
}
|
||||
|
||||
size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
|
||||
{
|
||||
assert(destination != indices);
|
||||
|
||||
size_t offset = 0;
|
||||
size_t start = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
if (restart_index && indices[i] == restart_index)
|
||||
{
|
||||
start = i + 1;
|
||||
}
|
||||
else if (i - start >= 2)
|
||||
{
|
||||
unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
|
||||
|
||||
// flip winding for odd triangles
|
||||
if ((i - start) & 1)
|
||||
{
|
||||
unsigned int t = a;
|
||||
a = b, b = t;
|
||||
}
|
||||
|
||||
// although we use restart indices, strip swaps still produce degenerate triangles, so skip them
|
||||
if (a != b && a != c && b != c)
|
||||
{
|
||||
destination[offset + 0] = a;
|
||||
destination[offset + 1] = b;
|
||||
destination[offset + 2] = c;
|
||||
offset += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t meshopt_unstripifyBound(size_t index_count)
|
||||
{
|
||||
assert(index_count == 0 || index_count >= 3);
|
||||
|
||||
return (index_count == 0) ? 0 : (index_count - 2) * 3;
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(cache_size >= 3);
|
||||
assert(warp_size == 0 || warp_size >= 3);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_VertexCacheStatistics result = {};
|
||||
|
||||
unsigned int warp_offset = 0;
|
||||
unsigned int primgroup_offset = 0;
|
||||
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
bool ac = (timestamp - cache_timestamps[a]) > cache_size;
|
||||
bool bc = (timestamp - cache_timestamps[b]) > cache_size;
|
||||
bool cc = (timestamp - cache_timestamps[c]) > cache_size;
|
||||
|
||||
// flush cache if triangle doesn't fit into warp or into the primitive buffer
|
||||
if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
|
||||
{
|
||||
result.warps_executed += warp_offset > 0;
|
||||
|
||||
warp_offset = 0;
|
||||
primgroup_offset = 0;
|
||||
|
||||
// reset cache
|
||||
timestamp += cache_size + 1;
|
||||
}
|
||||
|
||||
// update cache and add vertices to warp
|
||||
for (int j = 0; j < 3; ++j)
|
||||
{
|
||||
unsigned int index = indices[i + j];
|
||||
|
||||
if (timestamp - cache_timestamps[index] > cache_size)
|
||||
{
|
||||
cache_timestamps[index] = timestamp++;
|
||||
result.vertices_transformed++;
|
||||
warp_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
primgroup_offset++;
|
||||
}
|
||||
|
||||
size_t unique_vertex_count = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
unique_vertex_count += cache_timestamps[i] > 0;
|
||||
|
||||
result.warps_executed += warp_offset > 0;
|
||||
|
||||
result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
|
||||
result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,473 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
|
||||
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
const size_t kCacheSizeMax = 16;
|
||||
const size_t kValenceMax = 8;
|
||||
|
||||
struct VertexScoreTable
|
||||
{
|
||||
float cache[1 + kCacheSizeMax];
|
||||
float live[1 + kValenceMax];
|
||||
};
|
||||
|
||||
// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
|
||||
static const VertexScoreTable kVertexScoreTable = {
|
||||
{0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
|
||||
{0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
|
||||
};
|
||||
|
||||
// Tuned to minimize the encoded index buffer size
|
||||
static const VertexScoreTable kVertexScoreTableStrip = {
|
||||
{0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
|
||||
{0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
|
||||
};
|
||||
|
||||
struct TriangleAdjacency
|
||||
{
|
||||
unsigned int* counts;
|
||||
unsigned int* offsets;
|
||||
unsigned int* data;
|
||||
};
|
||||
|
||||
static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
|
||||
{
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// allocate arrays
|
||||
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.data = allocator.allocate<unsigned int>(index_count);
|
||||
|
||||
// fill triangle counts
|
||||
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
assert(indices[i] < vertex_count);
|
||||
|
||||
adjacency.counts[indices[i]]++;
|
||||
}
|
||||
|
||||
// fill offset table
|
||||
unsigned int offset = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
adjacency.offsets[i] = offset;
|
||||
offset += adjacency.counts[i];
|
||||
}
|
||||
|
||||
assert(offset == index_count);
|
||||
|
||||
// fill triangle data
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
|
||||
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
|
||||
}
|
||||
|
||||
// fix offsets that have been disturbed by the previous pass
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
assert(adjacency.offsets[i] >= adjacency.counts[i]);
|
||||
|
||||
adjacency.offsets[i] -= adjacency.counts[i];
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
|
||||
{
|
||||
// check dead-end stack
|
||||
while (dead_end_top)
|
||||
{
|
||||
unsigned int vertex = dead_end[--dead_end_top];
|
||||
|
||||
if (live_triangles[vertex] > 0)
|
||||
return vertex;
|
||||
}
|
||||
|
||||
// input order
|
||||
while (input_cursor < vertex_count)
|
||||
{
|
||||
if (live_triangles[input_cursor] > 0)
|
||||
return input_cursor;
|
||||
|
||||
++input_cursor;
|
||||
}
|
||||
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
|
||||
{
|
||||
unsigned int best_candidate = ~0u;
|
||||
int best_priority = -1;
|
||||
|
||||
for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
|
||||
{
|
||||
unsigned int vertex = *next_candidate;
|
||||
|
||||
// otherwise we don't need to process it
|
||||
if (live_triangles[vertex] > 0)
|
||||
{
|
||||
int priority = 0;
|
||||
|
||||
// will it be in cache after fanning?
|
||||
if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
|
||||
{
|
||||
priority = timestamp - cache_timestamps[vertex]; // position in cache
|
||||
}
|
||||
|
||||
if (priority > best_priority)
|
||||
{
|
||||
best_candidate = vertex;
|
||||
best_priority = priority;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return best_candidate;
|
||||
}
|
||||
|
||||
static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
|
||||
{
|
||||
assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
|
||||
|
||||
unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
|
||||
|
||||
return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
|
||||
}
|
||||
|
||||
static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
|
||||
{
|
||||
// input order
|
||||
while (input_cursor < face_count)
|
||||
{
|
||||
if (!emitted_flags[input_cursor])
|
||||
return input_cursor;
|
||||
|
||||
++input_cursor;
|
||||
}
|
||||
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
unsigned int cache_size = 16;
|
||||
assert(cache_size <= kCacheSizeMax);
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// build adjacency information
|
||||
TriangleAdjacency adjacency = {};
|
||||
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
|
||||
|
||||
// live triangle counts
|
||||
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
|
||||
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// emitted flags
|
||||
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
|
||||
memset(emitted_flags, 0, face_count);
|
||||
|
||||
// compute initial vertex scores
|
||||
float* vertex_scores = allocator.allocate<float>(vertex_count);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
|
||||
|
||||
// compute triangle scores
|
||||
float* triangle_scores = allocator.allocate<float>(face_count);
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0];
|
||||
unsigned int b = indices[i * 3 + 1];
|
||||
unsigned int c = indices[i * 3 + 2];
|
||||
|
||||
triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
|
||||
}
|
||||
|
||||
unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
|
||||
unsigned int* cache = cache_holder;
|
||||
unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
|
||||
size_t cache_count = 0;
|
||||
|
||||
unsigned int current_triangle = 0;
|
||||
unsigned int input_cursor = 1;
|
||||
|
||||
unsigned int output_triangle = 0;
|
||||
|
||||
while (current_triangle != ~0u)
|
||||
{
|
||||
assert(output_triangle < face_count);
|
||||
|
||||
unsigned int a = indices[current_triangle * 3 + 0];
|
||||
unsigned int b = indices[current_triangle * 3 + 1];
|
||||
unsigned int c = indices[current_triangle * 3 + 2];
|
||||
|
||||
// output indices
|
||||
destination[output_triangle * 3 + 0] = a;
|
||||
destination[output_triangle * 3 + 1] = b;
|
||||
destination[output_triangle * 3 + 2] = c;
|
||||
output_triangle++;
|
||||
|
||||
// update emitted flags
|
||||
emitted_flags[current_triangle] = true;
|
||||
triangle_scores[current_triangle] = 0;
|
||||
|
||||
// new triangle
|
||||
size_t cache_write = 0;
|
||||
cache_new[cache_write++] = a;
|
||||
cache_new[cache_write++] = b;
|
||||
cache_new[cache_write++] = c;
|
||||
|
||||
// old triangles
|
||||
for (size_t i = 0; i < cache_count; ++i)
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
if (index != a && index != b && index != c)
|
||||
{
|
||||
cache_new[cache_write++] = index;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int* cache_temp = cache;
|
||||
cache = cache_new, cache_new = cache_temp;
|
||||
cache_count = cache_write > cache_size ? cache_size : cache_write;
|
||||
|
||||
// update live triangle counts
|
||||
live_triangles[a]--;
|
||||
live_triangles[b]--;
|
||||
live_triangles[c]--;
|
||||
|
||||
// remove emitted triangle from adjacency data
|
||||
// this makes sure that we spend less time traversing these lists on subsequent iterations
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
unsigned int index = indices[current_triangle * 3 + k];
|
||||
|
||||
unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbours_size = adjacency.counts[index];
|
||||
|
||||
for (size_t i = 0; i < neighbours_size; ++i)
|
||||
{
|
||||
unsigned int tri = neighbours[i];
|
||||
|
||||
if (tri == current_triangle)
|
||||
{
|
||||
neighbours[i] = neighbours[neighbours_size - 1];
|
||||
adjacency.counts[index]--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int best_triangle = ~0u;
|
||||
float best_score = 0;
|
||||
|
||||
// update cache positions, vertex scores and triangle scores, and find next best triangle
|
||||
for (size_t i = 0; i < cache_write; ++i)
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
int cache_position = i >= cache_size ? -1 : int(i);
|
||||
|
||||
// update vertex score
|
||||
float score = vertexScore(table, cache_position, live_triangles[index]);
|
||||
float score_diff = score - vertex_scores[index];
|
||||
|
||||
vertex_scores[index] = score;
|
||||
|
||||
// update scores of vertex triangles
|
||||
const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
|
||||
const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
|
||||
|
||||
for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
|
||||
{
|
||||
unsigned int tri = *it;
|
||||
assert(!emitted_flags[tri]);
|
||||
|
||||
float tri_score = triangle_scores[tri] + score_diff;
|
||||
assert(tri_score > 0);
|
||||
|
||||
if (best_score < tri_score)
|
||||
{
|
||||
best_triangle = tri;
|
||||
best_score = tri_score;
|
||||
}
|
||||
|
||||
triangle_scores[tri] = tri_score;
|
||||
}
|
||||
}
|
||||
|
||||
// step through input triangles in order if we hit a dead-end
|
||||
current_triangle = best_triangle;
|
||||
|
||||
if (current_triangle == ~0u)
|
||||
{
|
||||
current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
|
||||
}
|
||||
}
|
||||
|
||||
assert(input_cursor == face_count);
|
||||
assert(output_triangle == face_count);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
|
||||
}
|
||||
|
||||
void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(cache_size >= 3);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// guard for empty meshes
|
||||
if (index_count == 0 || vertex_count == 0)
|
||||
return;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == indices)
|
||||
{
|
||||
unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
|
||||
memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
|
||||
indices = indices_copy;
|
||||
}
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// build adjacency information
|
||||
TriangleAdjacency adjacency = {};
|
||||
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
|
||||
|
||||
// live triangle counts
|
||||
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
|
||||
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// cache time stamps
|
||||
unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
// dead-end stack
|
||||
unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
|
||||
unsigned int dead_end_top = 0;
|
||||
|
||||
// emitted flags
|
||||
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
|
||||
memset(emitted_flags, 0, face_count);
|
||||
|
||||
unsigned int current_vertex = 0;
|
||||
|
||||
unsigned int timestamp = cache_size + 1;
|
||||
unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
|
||||
|
||||
unsigned int output_triangle = 0;
|
||||
|
||||
while (current_vertex != ~0u)
|
||||
{
|
||||
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
|
||||
|
||||
// emit all vertex neighbours
|
||||
const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
|
||||
const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
|
||||
|
||||
for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
|
||||
{
|
||||
unsigned int triangle = *it;
|
||||
|
||||
if (!emitted_flags[triangle])
|
||||
{
|
||||
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
|
||||
|
||||
// output indices
|
||||
destination[output_triangle * 3 + 0] = a;
|
||||
destination[output_triangle * 3 + 1] = b;
|
||||
destination[output_triangle * 3 + 2] = c;
|
||||
output_triangle++;
|
||||
|
||||
// update dead-end stack
|
||||
dead_end[dead_end_top + 0] = a;
|
||||
dead_end[dead_end_top + 1] = b;
|
||||
dead_end[dead_end_top + 2] = c;
|
||||
dead_end_top += 3;
|
||||
|
||||
// update live triangle counts
|
||||
live_triangles[a]--;
|
||||
live_triangles[b]--;
|
||||
live_triangles[c]--;
|
||||
|
||||
// update cache info
|
||||
// if vertex is not in cache, put it in cache
|
||||
if (timestamp - cache_timestamps[a] > cache_size)
|
||||
cache_timestamps[a] = timestamp++;
|
||||
|
||||
if (timestamp - cache_timestamps[b] > cache_size)
|
||||
cache_timestamps[b] = timestamp++;
|
||||
|
||||
if (timestamp - cache_timestamps[c] > cache_size)
|
||||
cache_timestamps[c] = timestamp++;
|
||||
|
||||
// update emitted flags
|
||||
emitted_flags[triangle] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// next candidates are the ones we pushed to dead-end stack just now
|
||||
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
|
||||
|
||||
// get next vertex
|
||||
current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
|
||||
|
||||
if (current_vertex == ~0u)
|
||||
{
|
||||
current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
|
||||
}
|
||||
}
|
||||
|
||||
assert(output_triangle == face_count);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,825 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
// The block below auto-detects SIMD ISA that can be used on the target platform
|
||||
#ifndef MESHOPTIMIZER_NO_SIMD
|
||||
|
||||
// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings
|
||||
#if defined(__SSE2__)
|
||||
#define SIMD_SSE
|
||||
#endif
|
||||
|
||||
// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2
|
||||
#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#define SIMD_SSE
|
||||
#endif
|
||||
|
||||
// GCC/clang define these when NEON support is available
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
#define SIMD_NEON
|
||||
#endif
|
||||
|
||||
// On MSVC, we assume that ARM builds always target NEON-capable devices
|
||||
#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
|
||||
#define SIMD_NEON
|
||||
#endif
|
||||
|
||||
// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
|
||||
#if defined(__wasm_simd128__)
|
||||
#define SIMD_WASM
|
||||
#endif
|
||||
|
||||
#endif // !MESHOPTIMIZER_NO_SIMD
|
||||
|
||||
#ifdef SIMD_SSE
|
||||
#include <emmintrin.h>
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_NEON
|
||||
#if defined(_MSC_VER) && defined(_M_ARM64)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#include <wasm_simd128.h>
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
|
||||
#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
|
||||
#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6)
|
||||
#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
|
||||
#endif
|
||||
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM)
|
||||
template <typename T>
|
||||
static void decodeFilterOct(T* data, size_t count)
|
||||
{
|
||||
const float max = float((1 << (sizeof(T) * 8 - 1)) - 1);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
float x = float(data[i * 4 + 0]);
|
||||
float y = float(data[i * 4 + 1]);
|
||||
float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y);
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
float t = (z >= 0.f) ? 0.f : z;
|
||||
|
||||
x += (x >= 0.f) ? t : -t;
|
||||
y += (y >= 0.f) ? t : -t;
|
||||
|
||||
// compute normal length & scale
|
||||
float l = sqrtf(x * x + y * y + z * z);
|
||||
float s = max / l;
|
||||
|
||||
// rounded signed float->int
|
||||
int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f));
|
||||
int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f));
|
||||
int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f));
|
||||
|
||||
data[i * 4 + 0] = T(xf);
|
||||
data[i * 4 + 1] = T(yf);
|
||||
data[i * 4 + 2] = T(zf);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterQuat(short* data, size_t count)
|
||||
{
|
||||
const float scale = 1.f / sqrtf(2.f);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
// recover scale from the high byte of the component
|
||||
int sf = data[i * 4 + 3] | 3;
|
||||
float ss = scale / float(sf);
|
||||
|
||||
// convert x/y/z to [-1..1] (scaled...)
|
||||
float x = float(data[i * 4 + 0]) * ss;
|
||||
float y = float(data[i * 4 + 1]) * ss;
|
||||
float z = float(data[i * 4 + 2]) * ss;
|
||||
|
||||
// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
|
||||
float ww = 1.f - x * x - y * y - z * z;
|
||||
float w = sqrtf(ww >= 0.f ? ww : 0.f);
|
||||
|
||||
// rounded signed float->int
|
||||
int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f));
|
||||
int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f));
|
||||
int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f));
|
||||
int wf = int(w * 32767.f + 0.5f);
|
||||
|
||||
int qc = data[i * 4 + 3] & 3;
|
||||
|
||||
// output order is dictated by input index
|
||||
data[i * 4 + ((qc + 1) & 3)] = short(xf);
|
||||
data[i * 4 + ((qc + 2) & 3)] = short(yf);
|
||||
data[i * 4 + ((qc + 3) & 3)] = short(zf);
|
||||
data[i * 4 + ((qc + 0) & 3)] = short(wf);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterExp(unsigned int* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int v = data[i];
|
||||
|
||||
// decode mantissa and exponent
|
||||
int m = int(v << 8) >> 8;
|
||||
int e = int(v) >> 24;
|
||||
|
||||
union
|
||||
{
|
||||
float f;
|
||||
unsigned int ui;
|
||||
} u;
|
||||
|
||||
// optimized version of ldexp(float(m), e)
|
||||
u.ui = unsigned(e + 127) << 23;
|
||||
u.f = u.f * float(m);
|
||||
|
||||
data[i] = u.ui;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
inline uint64_t rotateleft64(uint64_t v, int x)
|
||||
{
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
return _rotl64(v, x);
|
||||
// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for
|
||||
// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
|
||||
#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11)
|
||||
return __builtin_rotateleft64(v, x);
|
||||
#else
|
||||
return (v << (x & 63)) | (v >> ((64 - x) & 63));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_SSE
|
||||
static void decodeFilterOctSimd(signed char* data, size_t count)
|
||||
{
|
||||
const __m128 sign = _mm_set1_ps(-0.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4]));
|
||||
|
||||
// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
|
||||
__m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24);
|
||||
__m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24);
|
||||
|
||||
// unpack z; note that z is unsigned so we technically don't need to sign extend it
|
||||
__m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24);
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
__m128 x = _mm_cvtepi32_ps(xf);
|
||||
__m128 y = _mm_cvtepi32_ps(yf);
|
||||
__m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
__m128 t = _mm_min_ps(z, _mm_setzero_ps());
|
||||
|
||||
x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
|
||||
y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
|
||||
|
||||
// compute normal length & scale
|
||||
__m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
|
||||
__m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll));
|
||||
|
||||
// rounded signed float->int
|
||||
__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
|
||||
__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
|
||||
__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
|
||||
|
||||
// combine xr/yr/zr into final value
|
||||
__m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000));
|
||||
res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff)));
|
||||
res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8));
|
||||
res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16));
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterOctSimd(short* data, size_t count)
|
||||
{
|
||||
const __m128 sign = _mm_set1_ps(-0.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128 n4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
|
||||
__m128 n4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
__m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0)));
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
__m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16);
|
||||
__m128i yf = _mm_srai_epi32(n4, 16);
|
||||
|
||||
// unpack z; note that z is unsigned so we don't need to sign extend it
|
||||
__m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1)));
|
||||
__m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff));
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
__m128 x = _mm_cvtepi32_ps(xf);
|
||||
__m128 y = _mm_cvtepi32_ps(yf);
|
||||
__m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
__m128 t = _mm_min_ps(z, _mm_setzero_ps());
|
||||
|
||||
x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign)));
|
||||
y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign)));
|
||||
|
||||
// compute normal length & scale
|
||||
__m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)));
|
||||
__m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll));
|
||||
|
||||
// rounded signed float->int
|
||||
__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
|
||||
__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
|
||||
__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
|
||||
|
||||
// mix x/z and y/0 to make 16-bit unpack easier
|
||||
__m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
|
||||
__m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff));
|
||||
|
||||
// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
|
||||
__m128i res_0 = _mm_unpacklo_epi16(xzr, y0r);
|
||||
__m128i res_1 = _mm_unpackhi_epi16(xzr, y0r);
|
||||
|
||||
// patch in .w
|
||||
res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000)));
|
||||
res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000)));
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterQuatSimd(short* data, size_t count)
|
||||
{
|
||||
const float scale = 1.f / sqrtf(2.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128 q4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4]));
|
||||
__m128 q4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
__m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0)));
|
||||
__m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1)));
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
__m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16);
|
||||
__m128i yf = _mm_srai_epi32(q4_xy, 16);
|
||||
__m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16);
|
||||
__m128i cf = _mm_srai_epi32(q4_zc, 16);
|
||||
|
||||
// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
|
||||
__m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3));
|
||||
__m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf));
|
||||
|
||||
// convert x/y/z to [-1..1] (scaled...)
|
||||
__m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss);
|
||||
__m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss);
|
||||
__m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss);
|
||||
|
||||
// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
|
||||
__m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))));
|
||||
__m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps()));
|
||||
|
||||
__m128 s = _mm_set1_ps(32767.f);
|
||||
|
||||
// rounded signed float->int
|
||||
__m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s));
|
||||
__m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s));
|
||||
__m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s));
|
||||
__m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s));
|
||||
|
||||
// mix x/z and w/y to make 16-bit unpack easier
|
||||
__m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16));
|
||||
__m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16));
|
||||
|
||||
// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
|
||||
__m128i res_0 = _mm_unpacklo_epi16(wyr, xzr);
|
||||
__m128i res_1 = _mm_unpackhi_epi16(wyr, xzr);
|
||||
|
||||
// store results to stack so that we can rotate using scalar instructions
|
||||
uint64_t res[4];
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1);
|
||||
|
||||
// rotate and store
|
||||
uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
|
||||
|
||||
out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4);
|
||||
out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4);
|
||||
out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4);
|
||||
out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i]));
|
||||
|
||||
// decode exponent into 2^x directly
|
||||
__m128i ef = _mm_srai_epi32(v, 24);
|
||||
__m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23);
|
||||
|
||||
// decode 24-bit mantissa into floating-point value
|
||||
__m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8);
|
||||
__m128 m = _mm_cvtepi32_ps(mf);
|
||||
|
||||
__m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m);
|
||||
|
||||
_mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
inline float32x4_t vsqrtq_f32(float32x4_t x)
|
||||
{
|
||||
float32x4_t r = vrsqrteq_f32(x);
|
||||
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate
|
||||
return vmulq_f32(r, x);
|
||||
}
|
||||
|
||||
inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y)
|
||||
{
|
||||
float32x4_t r = vrecpeq_f32(y);
|
||||
r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate
|
||||
return vmulq_f32(x, r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_NEON
|
||||
static void decodeFilterOctSimd(signed char* data, size_t count)
|
||||
{
|
||||
const int32x4_t sign = vdupq_n_s32(0x80000000);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t n4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]));
|
||||
|
||||
// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
|
||||
int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24);
|
||||
int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24);
|
||||
|
||||
// unpack z; note that z is unsigned so we technically don't need to sign extend it
|
||||
int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24);
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
float32x4_t x = vcvtq_f32_s32(xf);
|
||||
float32x4_t y = vcvtq_f32_s32(yf);
|
||||
float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
|
||||
|
||||
x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
|
||||
y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
|
||||
|
||||
// compute normal length & scale
|
||||
float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
|
||||
float32x4_t rl = vrsqrteq_f32(ll);
|
||||
float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl);
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const float32x4_t fsnap = vdupq_n_f32(3 << 22);
|
||||
|
||||
int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
|
||||
int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
|
||||
int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
|
||||
|
||||
// combine xr/yr/zr into final value
|
||||
int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000));
|
||||
res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff)));
|
||||
res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8));
|
||||
res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16));
|
||||
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterOctSimd(short* data, size_t count)
|
||||
{
|
||||
const int32x4_t sign = vdupq_n_s32(0x80000000);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t n4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
|
||||
int32x4_t n4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0];
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16);
|
||||
int32x4_t yf = vshrq_n_s32(n4, 16);
|
||||
|
||||
// unpack z; note that z is unsigned so we don't need to sign extend it
|
||||
int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1];
|
||||
int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff));
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
float32x4_t x = vcvtq_f32_s32(xf);
|
||||
float32x4_t y = vcvtq_f32_s32(yf);
|
||||
float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f));
|
||||
|
||||
x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign))));
|
||||
y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign))));
|
||||
|
||||
// compute normal length & scale
|
||||
float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)));
|
||||
float32x4_t rl = vrsqrteq_f32(ll);
|
||||
rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate
|
||||
float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl);
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const float32x4_t fsnap = vdupq_n_f32(3 << 22);
|
||||
|
||||
int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
|
||||
int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
|
||||
int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
|
||||
|
||||
// mix x/z and y/0 to make 16-bit unpack easier
|
||||
int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
|
||||
int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff));
|
||||
|
||||
// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
|
||||
int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]);
|
||||
int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]);
|
||||
|
||||
// patch in .w
|
||||
res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0);
|
||||
res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1);
|
||||
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0);
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterQuatSimd(short* data, size_t count)
|
||||
{
|
||||
const float scale = 1.f / sqrtf(2.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t q4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
|
||||
int32x4_t q4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0];
|
||||
int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1];
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16);
|
||||
int32x4_t yf = vshrq_n_s32(q4_xy, 16);
|
||||
int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16);
|
||||
int32x4_t cf = vshrq_n_s32(q4_zc, 16);
|
||||
|
||||
// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
|
||||
int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3));
|
||||
float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf));
|
||||
|
||||
// convert x/y/z to [-1..1] (scaled...)
|
||||
float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss);
|
||||
float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss);
|
||||
float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss);
|
||||
|
||||
// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
|
||||
float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))));
|
||||
float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f)));
|
||||
|
||||
float32x4_t s = vdupq_n_f32(32767.f);
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const float32x4_t fsnap = vdupq_n_f32(3 << 22);
|
||||
|
||||
int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap));
|
||||
int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap));
|
||||
int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap));
|
||||
int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap));
|
||||
|
||||
// mix x/z and w/y to make 16-bit unpack easier
|
||||
int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16));
|
||||
int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16));
|
||||
|
||||
// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
|
||||
int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]);
|
||||
int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]);
|
||||
|
||||
// rotate and store
|
||||
uint64_t* out = (uint64_t*)&data[i * 4];
|
||||
|
||||
out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4);
|
||||
out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4);
|
||||
out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4);
|
||||
out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t v = vld1q_s32(reinterpret_cast<int32_t*>(&data[i]));
|
||||
|
||||
// decode exponent into 2^x directly
|
||||
int32x4_t ef = vshrq_n_s32(v, 24);
|
||||
int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23);
|
||||
|
||||
// decode 24-bit mantissa into floating-point value
|
||||
int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8);
|
||||
float32x4_t m = vcvtq_f32_s32(mf);
|
||||
|
||||
float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m);
|
||||
|
||||
vst1q_f32(reinterpret_cast<float*>(&data[i]), r);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
static void decodeFilterOctSimd(signed char* data, size_t count)
|
||||
{
|
||||
const v128_t sign = wasm_f32x4_splat(-0.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t n4 = wasm_v128_load(&data[i * 4]);
|
||||
|
||||
// sign-extends each of x,y in [x y ? ?] with arithmetic shifts
|
||||
v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24);
|
||||
v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24);
|
||||
|
||||
// unpack z; note that z is unsigned so we technically don't need to sign extend it
|
||||
v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24);
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
v128_t x = wasm_f32x4_convert_i32x4(xf);
|
||||
v128_t y = wasm_f32x4_convert_i32x4(yf);
|
||||
v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
// note: i32x4_min with 0 is equvalent to f32x4_min
|
||||
v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
|
||||
|
||||
x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
|
||||
y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
|
||||
|
||||
// compute normal length & scale
|
||||
v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
|
||||
v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction
|
||||
const v128_t fsnap = wasm_f32x4_splat(3 << 22);
|
||||
|
||||
v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
|
||||
v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
|
||||
v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
|
||||
|
||||
// combine xr/yr/zr into final value
|
||||
v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000));
|
||||
res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff)));
|
||||
res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8));
|
||||
res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16));
|
||||
|
||||
wasm_v128_store(&data[i * 4], res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterOctSimd(short* data, size_t count)
|
||||
{
|
||||
const v128_t sign = wasm_f32x4_splat(-0.f);
|
||||
const v128_t zmask = wasm_i32x4_splat(0x7fff);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]);
|
||||
v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]);
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1);
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16);
|
||||
v128_t yf = wasm_i32x4_shr(n4, 16);
|
||||
|
||||
// unpack z; note that z is unsigned so we don't need to sign extend it
|
||||
v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1);
|
||||
v128_t zf = wasm_v128_and(z4, zmask);
|
||||
|
||||
// convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count
|
||||
v128_t x = wasm_f32x4_convert_i32x4(xf);
|
||||
v128_t y = wasm_f32x4_convert_i32x4(yf);
|
||||
v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y)));
|
||||
|
||||
// fixup octahedral coordinates for z<0
|
||||
// note: i32x4_min with 0 is equvalent to f32x4_min
|
||||
v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0));
|
||||
|
||||
x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign)));
|
||||
y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign)));
|
||||
|
||||
// compute normal length & scale
|
||||
v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)));
|
||||
v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const v128_t fsnap = wasm_f32x4_splat(3 << 22);
|
||||
|
||||
v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
|
||||
v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
|
||||
v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
|
||||
|
||||
// mix x/z and y/0 to make 16-bit unpack easier
|
||||
v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
|
||||
v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff));
|
||||
|
||||
// pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w
|
||||
v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r);
|
||||
v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r);
|
||||
|
||||
// patch in .w
|
||||
res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000)));
|
||||
res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000)));
|
||||
|
||||
wasm_v128_store(&data[(i + 0) * 4], res_0);
|
||||
wasm_v128_store(&data[(i + 2) * 4], res_1);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterQuatSimd(short* data, size_t count)
|
||||
{
|
||||
const float scale = 1.f / sqrtf(2.f);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]);
|
||||
v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]);
|
||||
|
||||
// gather both x/y 16-bit pairs in each 32-bit lane
|
||||
v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1);
|
||||
v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1);
|
||||
|
||||
// sign-extends each of x,y in [x y] with arithmetic shifts
|
||||
v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16);
|
||||
v128_t yf = wasm_i32x4_shr(q4_xy, 16);
|
||||
v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16);
|
||||
v128_t cf = wasm_i32x4_shr(q4_zc, 16);
|
||||
|
||||
// get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f)
|
||||
v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3));
|
||||
v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf));
|
||||
|
||||
// convert x/y/z to [-1..1] (scaled...)
|
||||
v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss);
|
||||
v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss);
|
||||
v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss);
|
||||
|
||||
// reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors
|
||||
// note: i32x4_max with 0 is equivalent to f32x4_max
|
||||
v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))));
|
||||
v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0)));
|
||||
|
||||
v128_t s = wasm_f32x4_splat(32767.f);
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const v128_t fsnap = wasm_f32x4_splat(3 << 22);
|
||||
|
||||
v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap);
|
||||
v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap);
|
||||
v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap);
|
||||
v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap);
|
||||
|
||||
// mix x/z and w/y to make 16-bit unpack easier
|
||||
v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16));
|
||||
v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16));
|
||||
|
||||
// pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0)
|
||||
v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr);
|
||||
v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
|
||||
|
||||
// compute component index shifted left by 4 (and moved into i32x4 slot)
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
|
||||
volatile v128_t cm = wasm_i32x4_shl(cf, 4);
|
||||
|
||||
// rotate and store
|
||||
uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
|
||||
|
||||
out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0));
|
||||
out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1));
|
||||
out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2));
|
||||
out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3));
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t v = wasm_v128_load(&data[i]);
|
||||
|
||||
// decode exponent into 2^x directly
|
||||
v128_t ef = wasm_i32x4_shr(v, 24);
|
||||
v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23);
|
||||
|
||||
// decode 24-bit mantissa into floating-point value
|
||||
v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8);
|
||||
v128_t m = wasm_f32x4_convert_i32x4(mf);
|
||||
|
||||
v128_t r = wasm_f32x4_mul(es, m);
|
||||
|
||||
wasm_v128_store(&data[i], r);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size == 4 || vertex_size == 8);
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
if (vertex_size == 4)
|
||||
decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count);
|
||||
else
|
||||
decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count);
|
||||
#else
|
||||
if (vertex_size == 4)
|
||||
decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
|
||||
else
|
||||
decodeFilterOct(static_cast<short*>(buffer), vertex_count);
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size == 8);
|
||||
(void)vertex_size;
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count);
|
||||
#else
|
||||
decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size % 4 == 0);
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
|
||||
#else
|
||||
decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef SIMD_SSE
|
||||
#undef SIMD_NEON
|
||||
#undef SIMD_WASM
|
|
@ -0,0 +1,58 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_VertexFetchStatistics result = {};
|
||||
|
||||
unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(vertex_visited, 0, vertex_count);
|
||||
|
||||
const size_t kCacheLine = 64;
|
||||
const size_t kCacheSize = 128 * 1024;
|
||||
|
||||
// simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
|
||||
size_t cache[kCacheSize / kCacheLine] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
vertex_visited[index] = 1;
|
||||
|
||||
size_t start_address = index * vertex_size;
|
||||
size_t end_address = start_address + vertex_size;
|
||||
|
||||
size_t start_tag = start_address / kCacheLine;
|
||||
size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine;
|
||||
|
||||
assert(start_tag < end_tag);
|
||||
|
||||
for (size_t tag = start_tag; tag < end_tag; ++tag)
|
||||
{
|
||||
size_t line = tag % (sizeof(cache) / sizeof(cache[0]));
|
||||
|
||||
// we store +1 since cache is filled with 0 by default
|
||||
result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine;
|
||||
cache[line] = tag + 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t unique_vertex_count = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
unique_vertex_count += vertex_visited[i];
|
||||
|
||||
result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
memset(destination, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
if (destination[index] == ~0u)
|
||||
{
|
||||
destination[index] = next_vertex++;
|
||||
}
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
||||
|
||||
size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// support in-place optimization
|
||||
if (destination == vertices)
|
||||
{
|
||||
unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size);
|
||||
memcpy(vertices_copy, vertices, vertex_count * vertex_size);
|
||||
vertices = vertices_copy;
|
||||
}
|
||||
|
||||
// build vertex remap table
|
||||
unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
memset(vertex_remap, -1, vertex_count * sizeof(unsigned int));
|
||||
|
||||
unsigned int next_vertex = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
unsigned int index = indices[i];
|
||||
assert(index < vertex_count);
|
||||
|
||||
unsigned int& remap = vertex_remap[index];
|
||||
|
||||
if (remap == ~0u) // vertex was not added to destination VB
|
||||
{
|
||||
// add vertex
|
||||
memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size);
|
||||
|
||||
remap = next_vertex++;
|
||||
}
|
||||
|
||||
// modify indices in place
|
||||
indices[i] = remap;
|
||||
}
|
||||
|
||||
assert(next_vertex <= vertex_count);
|
||||
|
||||
return next_vertex;
|
||||
}
|
Loading…
Reference in New Issue