diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h new file mode 100644 index 0000000000..2b0d408342 --- /dev/null +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -0,0 +1,25 @@ + +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ +#define _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ + +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/builtin/hlsl/shapes/obb.hlsl" + +namespace nbl::asset +{ + class COBBGenerator + { + public: + + using VertexCollection = CPolygonGeometryManipulator::VertexCollection; + + static hlsl::shapes::OBB<> compute(const VertexCollection& vertices); + + }; +} + +#endif diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index cc41bba7e9..7b953b4fbd 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -10,6 +10,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" +#include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset { @@ -231,6 +232,26 @@ class NBL_API2 CPolygonGeometryManipulator EEM_COUNT }; + struct VertexCollection + { + using FetchFn = std::function; + FetchFn fetch; + size_t size; + + static auto fromSpan(std::span vertices) -> VertexCollection + { + return VertexCollection{ + .fetch = [data = vertices.data()](size_t vertexIndex)-> hlsl::float32_t3 + { + return data[vertexIndex]; + }, + .size = vertices.size() + }; + } + + hlsl::float32_t3 operator[](size_t index) const { return fetch(index); } + }; + static hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(const VertexCollection& vertexCollection); static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); using SSNGVertexData = CSmoothNormalGenerator::VertexData; diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index 5b4b1be39d..07219c6687 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -60,6 +60,7 @@ struct AABB point_t maxVx; }; + namespace util { namespace impl diff --git a/include/nbl/builtin/hlsl/shapes/obb.hlsl b/include/nbl/builtin/hlsl/shapes/obb.hlsl new file mode 100644 index 0000000000..45873cbc7b --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/obb.hlsl @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SHAPES_OBB_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_OBB_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +template +struct OBB +{ + using scalar_t = Scalar; + using point_t = vector; + + static OBB createAxisAligned(point_t mid, point_t len) + { + OBB ret; + ret.mid = mid; + ret.ext = len * 0.5f; + for (auto dim_i = 0; dim_i < D; dim_i++) + { + ret.axes[dim_i] = point_t(0); + ret.axes[dim_i][dim_i] = 1; + } + return ret; + } + + point_t mid; + std::array axes; + point_t ext; +}; + +} +} +} + +#endif diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 126731f425..6263378024 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -208,6 +208,8 @@ namespace nbl::ext::debug_draw return transform; } + static hlsl::float32_t3x4 getTransformFromOBB(const hlsl::shapes::OBB<3, float>& aabb); + protected: struct ConstructorParams { diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 76e046848c..13e5b44728 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -186,6 +186,7 @@ set(NBL_ASSET_SOURCES # Meshes asset/utils/CForsythVertexCacheOptimizer.cpp asset/utils/CSmoothNormalGenerator.cpp + asset/utils/COBBGenerator.cpp asset/utils/CGeometryCreator.cpp asset/utils/CPolygonGeometryManipulator.cpp asset/utils/COverdrawPolygonGeometryOptimizer.cpp diff --git a/src/nbl/asset/utils/COBBGenerator.cpp b/src/nbl/asset/utils/COBBGenerator.cpp new file mode 100644 index 0000000000..d869a89cc6 --- /dev/null +++ b/src/nbl/asset/utils/COBBGenerator.cpp @@ -0,0 +1,478 @@ +#include "nbl/asset/utils/COBBGenerator.h" + +namespace nbl::asset +{ + +namespace +{ + +template +struct Extremals +{ + std::array values; + + T* minPtr() + { + return values.data(); + } + + const T* minPtr() const + { + return values.data(); + } + + T* maxPtr() + { + return values.data() + CountV; + } + + const T* maxPtr() const + { + return values.data() + CountV; + } + +}; +} + +hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) +{ + constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions + constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; + + if (vertices.size <= 0) + { + return hlsl::shapes::OBB<>::createAxisAligned({}, {}); + } + + static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t + { + return len.x * len.y + len.x * len.z + len.y * len.z; //half box area + }; + + using ExtremalVertices = Extremals; + using ExtremalProjections = Extremals; + using Axes = std::array; + using Edges = std::array; + + struct ExtremalSamples + { + ExtremalVertices vertices; + ExtremalProjections projections; + }; + + struct LargeBaseTriangle + { + hlsl::float32_t3 normal = {}; + Axes vertices = {}; + Edges edges = {}; + enum Flag + { + NORMAL, + SECOND_POINT_CLOSE, + THIRD_POINT_CLOSE + } flag; + }; + + static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples + { + ExtremalSamples result; + hlsl::float32_t proj; + + const auto firstVertex = vertices.fetch(0); + + auto* minProjections = result.projections.minPtr(); + auto* maxProjections = result.projections.maxPtr(); + + auto* minVertices = result.vertices.minPtr(); + auto* maxVertices = result.vertices.maxPtr(); + + // Slab 0: dir {1, 0, 0} + proj = firstVertex.x; + minProjections[0] = minProjections[0] = proj; + minVertices[0] = firstVertex; maxVertices[0] = firstVertex; + // Slab 1: dir {0, 1, 0} + proj = firstVertex.y; + minProjections[1] = maxProjections[1] = proj; + minVertices[1] = firstVertex; maxVertices[1] = firstVertex; + // Slab 2: dir {0, 0, 1} + proj = firstVertex.z; + minProjections[2] = maxProjections[2] = proj; + minVertices[2] = firstVertex; maxVertices[2] = firstVertex; + // Slab 3: dir {1, 1, 1} + proj = firstVertex.x + firstVertex.y + firstVertex.z; + minProjections[3] = maxProjections[3] = proj; + minVertices[3] = firstVertex; maxVertices[3] = firstVertex; + // Slab 4: dir {1, 1, -1} + proj = firstVertex.x + firstVertex.y - firstVertex.z; + minProjections[4] = maxProjections[4] = proj; + minVertices[4] = firstVertex; maxVertices[4] = firstVertex; + // Slab 5: dir {1, -1, 1} + proj = firstVertex.x - firstVertex.y + firstVertex.z; + minProjections[5] = maxProjections[5] = proj; + minVertices[5] = firstVertex; maxVertices[5] = firstVertex; + // Slab 6: dir {1, -1, -1} + proj = firstVertex.x - firstVertex.y - firstVertex.z; + minProjections[6] = maxProjections[6] = proj; + minVertices[6] = firstVertex; maxVertices[6] = firstVertex; + + for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) + { + const auto vertex = vertices.fetch(vertex_i); + // Slab 0: dir {1, 0, 0} + proj = vertices.fetch(vertex_i).x; + if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } + if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } + // Slab 1: dir {0, 1, 0} + proj = vertices.fetch(vertex_i).y; + if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } + if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } + // Slab 2: dir {0, 0, 1} + proj = vertices.fetch(vertex_i).z; + if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } + if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } + // Slab 3: dir {1, 1, 1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } + if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } + // Slab 4: dir {1, 1, -1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } + if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } + // Slab 5: dir {1, -1, 1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } + if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } + // Slab 6: dir {1, -1, -1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } + if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } + } + + return result; + }; + + + static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair + { + int indexFurthestPair = 0; + auto maxSqDist = hlsl::dot(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); + for (int k = 1; k < SAMPLE_DIR_COUNT; k++) + { + const auto sqDist = hlsl::dot(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); + if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } + } + return { + extremalVertices.minPtr()[indexFurthestPair], + extremalVertices.maxPtr()[indexFurthestPair] + }; + }; + + static auto sqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t + { + const auto u0 = q - p0; + const auto t = dot(v, u0); + const auto sqLen_v = hlsl::dot(v, v); + return hlsl::dot(u0, u0) - (t * t) / sqLen_v; + }; + + static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) + { + auto maxSqDist = sqDistPointInfiniteEdge(vertices[0], p0, e0); + int maxIndex = 0; + for (size_t i = 1; i < vertices.size; i++) + { + const auto sqDist = sqDistPointInfiniteEdge(vertices[i], p0, e0); + if (sqDist > maxSqDist) + { maxSqDist = sqDist; + maxIndex = i; + } + } + + struct Result + { + hlsl::float32_t3 point; + hlsl::float32_t sqDist; + }; + return Result{ + vertices[maxIndex], + maxSqDist + }; + }; + + static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = hlsl::dot(vertices[0], normal); + auto tMinProj = firstProj, tMaxProj = firstProj; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; } + if (proj > tMaxProj) { tMaxProj = proj; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + }; + return Result{ tMinProj, tMaxProj }; + }; + + static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = dot(vertices[0], normal); + + auto tMinProj = firstProj, tMaxProj = firstProj; + auto tMinVert = vertices[0], tMaxVert = vertices[0]; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } + if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + hlsl::float32_t3 minVert; + hlsl::float32_t3 maxVert; + }; + return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; + }; + + static auto findUpperLowerTetraPoints = []( + const hlsl::float32_t3& n, + const VertexCollection& vertices, + const hlsl::float32_t3& p0) + { + const auto eps = 0.000001f; + const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); + const auto triProj = hlsl::dot(p0, n); + + const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; + const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; + + struct Result + { + std::optional minVert; + std::optional maxVert; + }; + return Result{ + minVert, + maxVert + }; + }; + + static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( + const VertexCollection& vertices, + const hlsl::float32_t3 normal, + const std::array& edges, + Axes& bestAxes, + hlsl::float32_t& bestVal) + { + // The operands are assumed to be orthogonal and unit normals + const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); + const auto yLen = yExtremeProjs.maxProj - yExtremeProjs.minProj; + + for (const auto& edge : edges) + { + const auto binormal = hlsl::cross(edge, normal); + + const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); + const auto xLen = xExtremeProjs.maxProj - xExtremeProjs.minProj; + + const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); + const auto zLen = zExtremeProjs.maxProj - zExtremeProjs.minProj; + + const auto quality = getQualityValue({xLen, yLen, zLen}); + if (quality < bestVal) + { + bestVal = quality; + bestAxes = { + edge, + normal, + binormal + }; + } + } + + }; + + + static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle + { + constexpr hlsl::float32_t eps = 0.000001f; + + std::array baseTriangleVertices; + Edges edges; + + // Find the furthest point pair among the selected min and max point pairs + std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); + + // Degenerate case 1: + // If the found furthest points are located very close, return OBB aligned with the initial AABB + if (hlsl::dot(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + { + return { + .vertices = baseTriangleVertices, + .flag = LargeBaseTriangle::SECOND_POINT_CLOSE + }; + } + + // Compute edge vector of the line segment p0, p1 + edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); + + // Find a third point furthest away from line given by p0, e0 to define the large base triangle + const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + + // Degenerate case 2: + // If the third point is located very close to the line, return an OBB aligned with the line + if (furthestPointRes.sqDist < eps) + { + return { + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::THIRD_POINT_CLOSE + }; + } + + // Compute the two remaining edge vectors and the normal vector of the base triangle + edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); + edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); + const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); + + return { + .normal = normal, + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::NORMAL + }; + }; + + auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, + const LargeBaseTriangle& baseTriangle, + Axes& bestAxes, hlsl::float32_t& bestVal) + { + + // Find furthest points above and below the plane of the base triangle for tetra constructions + // For each found valid point, search for the best OBB axes based on the 3 arising triangles + const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); + if (upperLowerTetraVertices.minVert) + { + const auto minVert = *upperLowerTetraVertices.minVert; + const auto f0 = normalize(minVert - baseTriangle.vertices[0]); + const auto f1 = normalize(minVert - baseTriangle.vertices[1]); + const auto f2 = normalize(minVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + if (upperLowerTetraVertices.maxVert) + { + const auto maxVert = *upperLowerTetraVertices.maxVert; + const auto f0 = normalize(maxVert - baseTriangle.vertices[0]); + const auto f1 = normalize(maxVert - baseTriangle.vertices[1]); + const auto f2 = normalize(maxVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + }; + + static auto buildObbFromAxesAndLocalMinMax = []( + const Axes& axes, + const hlsl::float32_t3& localMin, + const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> + { + const auto localMid = 0.5f * (localMin + localMax); + return { + .mid = axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, + .axes = axes, + .ext = 0.5f * (localMax - localMin) + }; + }; + + static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) + { + const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); + const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); + const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); + const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; + const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; + return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); + }; + + static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) + { + // Given u, build any orthonormal base u, v, w + + // Make sure r is not equal to u + auto r = u; + if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } + else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } + else { r.z = 0; } + + const auto sqLen = hlsl::dot(r, r); + if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } + + const auto v = normalize(cross(u, r)); + const auto w = normalize(cross(u, v)); + return computeObb({ u, v, w }, vertices); + }; + + const auto extremals = findExtremals_7FixedDirs(vertices); + + const auto* minProj = extremals.projections.minPtr(); + const auto* maxProj = extremals.projections.maxPtr(); + + // Determine which points to use in the iterations below + const auto selectedVertices = [&] + { + if (vertices.size < SAMPLE_COUNT) { return vertices; } + return VertexCollection::fromSpan(extremals.vertices.values); + }(); + + // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) + auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); + auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); + auto alVal = getQualityValue(alLen); + + + const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); + + if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) + return computeLineAlignedObb(baseTriangle.edges[0], vertices); + + + Axes bestAxes = { + hlsl::float32_t3{1.f, 0.f, 0.f}, + {0.f, 1.f, 0.f}, + {0.f, 0.f, 1.f}, + }; + auto bestVal = alVal; + // Find best OBB axes based on the base triangle + findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); + + // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle + findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); + + const auto obb = computeObb(bestAxes, vertices); + + // Check if the OBB extent is still smaller than the intial AABB + if (getQualityValue(2.f * obb.ext) < alVal) + return obb; + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); +} + +} diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 3227ea9958..74169eb0d8 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -11,11 +11,19 @@ #include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/asset/utils/CVertexWelder.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" +#include "nbl/asset/utils/CForsythVertexCacheOptimizer.h" +#include "nbl/asset/utils/COverdrawPolygonGeometryOptimizer.h" +#include "nbl/asset/utils/COBBGenerator.h" namespace nbl::asset { +hlsl::shapes::OBB<> CPolygonGeometryManipulator::calculateOBB(const VertexCollection& vertices) +{ + return COBBGenerator::compute(vertices); +} + core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) { const auto* indexing = inGeo->getIndexingCallback(); @@ -161,33 +169,139 @@ core::smart_refctd_ptr CPolygonGeometryManipulator::createS } #if 0 +//! Flips the direction of surfaces. Changes backfacing triangles to frontfacing +//! triangles and vice versa. +//! \param mesh: Mesh on which the operation is performed. +void IMeshManipulator::flipSurfaces(ICPUMeshBuffer* inbuffer) +{ + if (!inbuffer) + return; + auto* pipeline = inbuffer->getPipeline(); + const E_PRIMITIVE_TOPOLOGY primType = pipeline->getCachedCreationParams().primitiveAssembly.primitiveType; + + const uint32_t idxcnt = inbuffer->getIndexCount(); + if (!inbuffer->getIndices()) + return; + + if (inbuffer->getIndexType() == EIT_16BIT) + { + uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); + switch (primType) + { + case EPT_TRIANGLE_FAN: + for (uint32_t i = 1; i < idxcnt; i += 2) + { + const uint16_t tmp = idx[i]; + idx[i] = idx[i + 1]; + idx[i + 1] = tmp; + } + break; + case EPT_TRIANGLE_STRIP: + if (idxcnt % 2) //odd + { + for (uint32_t i = 0; i < (idxcnt >> 1); i++) + { + const uint16_t tmp = idx[i]; + idx[i] = idx[idxcnt - 1 - i]; + idx[idxcnt - 1 - i] = tmp; + } + } + else //even + { + auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint16_t) }); + auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); + destPtr[0] = idx[0]; + memcpy(destPtr + 1u, idx, sizeof(uint16_t) * idxcnt); + inbuffer->setIndexCount(idxcnt + 1u); + SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; + inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); + } + break; + case EPT_TRIANGLE_LIST: + for (uint32_t i = 0; i < idxcnt; i += 3) + { + const uint16_t tmp = idx[i + 1]; + idx[i + 1] = idx[i + 2]; + idx[i + 2] = tmp; + } + break; + default: break; + } + } + else if (inbuffer->getIndexType() == EIT_32BIT) + { + uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); + switch (primType) + { + case EPT_TRIANGLE_FAN: + for (uint32_t i = 1; i < idxcnt; i += 2) + { + const uint32_t tmp = idx[i]; + idx[i] = idx[i + 1]; + idx[i + 1] = tmp; + } + break; + case EPT_TRIANGLE_STRIP: + if (idxcnt % 2) //odd + { + for (uint32_t i = 0; i < (idxcnt >> 1); i++) + { + const uint32_t tmp = idx[i]; + idx[i] = idx[idxcnt - 1 - i]; + idx[idxcnt - 1 - i] = tmp; + } + } + else //even + { + auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint32_t) }); + auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); + destPtr[0] = idx[0]; + memcpy(destPtr + 1u, idx, sizeof(uint32_t) * idxcnt); + inbuffer->setIndexCount(idxcnt + 1); + SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; + inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); + } + break; + case EPT_TRIANGLE_LIST: + for (uint32_t i = 0; i < idxcnt; i += 3) + { + const uint32_t tmp = idx[i + 1]; + idx[i + 1] = idx[i + 2]; + idx[i + 2] = tmp; + } + break; + default: break; + } + } +} + core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOptimized(const ICPUMeshBuffer* _inbuffer) { if (!_inbuffer) return nullptr; - const auto* pipeline = _inbuffer->getPipeline(); - const void* ind = _inbuffer->getIndices(); + const auto* pipeline = _inbuffer->getPipeline(); + const void* ind = _inbuffer->getIndices(); if (!pipeline || !ind) return nullptr; auto outbuffer = core::move_and_static_cast(_inbuffer->clone(1u)); - outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); - outbuffer->setSkin( - SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), - SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), - _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() - ); + outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); + outbuffer->setSkin( + SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), + SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), + _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() + ); - constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + constexpr uint32_t MAX_ATTRIBS = asset::ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; // Find vertex count size_t vertexCount = IMeshManipulator::upperBoundVertexID(_inbuffer); core::unordered_set buffers; for (size_t i = 0; i < MAX_ATTRIBS; ++i) - if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) - buffers.insert(buf); + if (auto* buf = _inbuffer->getAttribBoundBuffer(i).buffer.get()) + buffers.insert(buf); size_t offsets[MAX_ATTRIBS]; memset(offsets, -1, sizeof(offsets)); @@ -202,36 +316,36 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { types[i] = _inbuffer->getAttribFormat(i); - const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); - const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 + const uint32_t typeSz = getTexelOrBlockBytesize(types[i]); + const size_t alignment = (typeSz/getFormatChannelCount(types[i]) == 8u) ? 8ull : 4ull; // if format 64bit per channel, then align to 8 offsets[i] = lastOffset + lastSize; const size_t mod = offsets[i] % alignment; offsets[i] += mod; lastOffset = offsets[i]; - lastSize = typeSz; + lastSize = typeSz; } } const size_t vertexSize = lastOffset + lastSize; - constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; - auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; - vtxParams = SVertexInputParams(); - vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; - vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; - vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; - vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; + auto& vtxParams = outbuffer->getPipeline()->getCachedCreationParams().vertexInput; + vtxParams = SVertexInputParams(); + vtxParams.enabledAttribFlags = _inbuffer->getPipeline()->getCachedCreationParams().vertexInput.enabledAttribFlags; + vtxParams.enabledBindingFlags = 1u << NEW_VTX_BUF_BINDING; + vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = vertexSize; + vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; auto newVertBuffer = ICPUBuffer::create({ vertexCount*vertexSize }); - outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); + outbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertBuffer) }, NEW_VTX_BUF_BINDING); for (size_t i = 0; i < MAX_ATTRIBS; ++i) { if (offsets[i] < 0xffffffff) { - vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; - vtxParams.attributes[i].format = types[i]; - vtxParams.attributes[i].relativeOffset = offsets[i]; + vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; + vtxParams.attributes[i].format = types[i]; + vtxParams.attributes[i].relativeOffset = offsets[i]; } } } @@ -261,7 +375,7 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp { E_FORMAT type = types[activeAttribs[j]]; - if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) + if (!isNormalizedFormat(type) && (isIntegerFormat(type) || isScaledFormat(type))) { uint32_t dst[4]; _inbuffer->getAttribute(dst, activeAttribs[j], index); @@ -284,12 +398,1480 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp ((uint16_t*)indices)[i] = remap; } - _NBL_DELETE_ARRAY(remapBuffer,vertexCount); + _NBL_DELETE_ARRAY(remapBuffer,vertexCount); _NBL_DEBUG_BREAK_IF(nextVert > vertexCount) return outbuffer; } + +//! Creates a copy of the mesh, which will only consist of unique primitives +core::smart_refctd_ptr IMeshManipulator::createMeshBufferUniquePrimitives(ICPUMeshBuffer* inbuffer, bool _makeIndexBuf) +{ + if (!inbuffer) + return nullptr; + const ICPURenderpassIndependentPipeline* oldPipeline = inbuffer->getPipeline(); + if (!oldPipeline) + return nullptr; + + const uint32_t idxCnt = inbuffer->getIndexCount(); + if (idxCnt<2u || !inbuffer->getIndices()) + return core::smart_refctd_ptr(inbuffer); // yes we want an extra grab + + const auto& oldVtxParams = oldPipeline->getCachedCreationParams().vertexInput; + + auto clone = core::move_and_static_cast(inbuffer->clone(0u)); + + constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; + + auto pipeline = core::smart_refctd_ptr_static_cast(oldPipeline->clone(0u)); + auto& vtxParams = pipeline->getCachedCreationParams().vertexInput; + vtxParams = SVertexInputParams(); + + vtxParams.enabledBindingFlags = (1u<getAttribBoundBuffer(i); + if (inbuffer->isAttributeEnabled(i) && vbuf.buffer) + { + offset[i] = stride; + newAttribSizes[i] = getTexelOrBlockBytesize(inbuffer->getAttribFormat(i)); + stride += newAttribSizes[i]; + if (stride>=0xdeadbeefu) + return nullptr; + + sourceBuffers[i] = reinterpret_cast(vbuf.buffer->getPointer()); + sourceBuffers[i] += inbuffer->getAttribCombinedOffset(i); + sourceBufferStrides[i] = inbuffer->getAttribStride(i); + } + else + offset[i] = -1; + } + + vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = stride; + + auto vertexBuffer = ICPUBuffer::create({ stride*idxCnt }); + clone->setVertexBufferBinding({0u, vertexBuffer}, 0u); + for (size_t i=0; i= 0) + { + vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; + vtxParams.attributes[i].format = inbuffer->getAttribFormat(i); + vtxParams.attributes[i].relativeOffset = offset[i]; + } + } + + uint8_t* destPointer = reinterpret_cast(vertexBuffer->getPointer()); + if (inbuffer->getIndexType()==EIT_16BIT) + { + uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); + for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); + destPointer += newAttribSizes[j]; + } + } + else if (inbuffer->getIndexType()==EIT_32BIT) + { + uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); + for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); + destPointer += newAttribSizes[j]; + } + } + + clone->setPipeline(std::move(pipeline)); + + if (_makeIndexBuf) + { + auto idxbuf = ICPUBuffer::create({ idxCnt*(idxCnt<0x10000 ? 2u : 4u) }); + if (idxCnt<0x10000u) + { + for (uint32_t i = 0u; i < idxCnt; ++i) + reinterpret_cast(idxbuf->getPointer())[i] = i; + clone->setIndexType(EIT_16BIT); + } + else + { + for (uint32_t i = 0u; i < idxCnt; ++i) + reinterpret_cast(idxbuf->getPointer())[i] = i; + clone->setIndexType(EIT_32BIT); + } + clone->setIndexBufferBinding({ 0u, std::move(idxbuf) }); + } + else + { + clone->setIndexType(EIT_UNKNOWN); + } + } + + return clone; +} + +// +core::smart_refctd_ptr IMeshManipulator::calculateSmoothNormals(ICPUMeshBuffer* inbuffer, bool makeNewMesh, float epsilon, uint32_t normalAttrID, VxCmpFunction vxcmp) +{ + if (inbuffer == nullptr) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + //Mesh has to have unique primitives + if (inbuffer->getIndexType() != E_INDEX_TYPE::EIT_UNKNOWN) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } + + core::smart_refctd_ptr outbuffer; + if (makeNewMesh) + { + outbuffer = core::move_and_static_cast(inbuffer->clone(0u)); + + const auto normalAttr = inbuffer->getNormalAttributeIx(); + auto normalBinding = inbuffer->getBindingNumForAttribute(normalAttr); + const auto oldPipeline = inbuffer->getPipeline(); + auto vertexParams = oldPipeline->getCachedCreationParams().vertexInput; + bool notUniqueBinding = false; + for (uint16_t attr=0u; attr0 && firstBindingNotUsed(firstBindingNotUsed); + + vertexParams.attributes[normalAttr].binding = normalBinding; + vertexParams.enabledBindingFlags |= 0x1u<getAttribFormat(normalAttr)); + auto normalBuf = ICPUBuffer::create({ normalFormatBytesize*IMeshManipulator::upperBoundVertexID(inbuffer) }); + outbuffer->setVertexBufferBinding({0ull,std::move(normalBuf)},normalBinding); + + auto pipeline = core::move_and_static_cast(oldPipeline->clone(0u)); + vertexParams.bindings[normalBinding].stride = normalFormatBytesize; + vertexParams.attributes[normalAttr].relativeOffset = 0u; + pipeline->getCachedCreationParams().vertexInput = vertexParams; + outbuffer->setPipeline(std::move(pipeline)); + } + else + outbuffer = core::smart_refctd_ptr(inbuffer); + CSmoothNormalGenerator::calculateNormals(outbuffer.get(), epsilon, normalAttrID, vxcmp); + + return outbuffer; +} + +// Used by createMeshBufferWelded only +static bool cmpVertices(ICPUMeshBuffer* _inbuf, const void* _va, const void* _vb, size_t _vsize, const IMeshManipulator::SErrorMetric* _errMetrics) +{ + auto cmpInteger = [](uint32_t* _a, uint32_t* _b, size_t _n) -> bool { + return !memcmp(_a, _b, _n*4); + }; + + constexpr uint32_t MAX_ATTRIBS = ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + + const uint8_t* va = reinterpret_cast(_va), *vb = reinterpret_cast(_vb); + for (size_t i = 0u; i < MAX_ATTRIBS; ++i) + { + if (!_inbuf->isAttributeEnabled(i)) + continue; + + const auto atype = _inbuf->getAttribFormat(i); + const auto cpa = getFormatChannelCount(atype); + + if (isIntegerFormat(atype) || isScaledFormat(atype)) + { + uint32_t attr[8]; + ICPUMeshBuffer::getAttribute(attr, va, atype); + ICPUMeshBuffer::getAttribute(attr+4, vb, atype); + if (!cmpInteger(attr, attr+4, cpa)) + return false; + } + else + { + core::vectorSIMDf attr[2]; + ICPUMeshBuffer::getAttribute(attr[0], va, atype); + ICPUMeshBuffer::getAttribute(attr[1], vb, atype); + if (!IMeshManipulator::compareFloatingPointAttribute(attr[0], attr[1], cpa, _errMetrics[i])) + return false; + } + + const uint32_t sz = getTexelOrBlockBytesize(atype); + va += sz; + vb += sz; + } + + return true; +} + +//! Creates a copy of a mesh, which will have identical vertices welded together +core::smart_refctd_ptr IMeshManipulator::createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* _errMetrics, const bool& optimIndexType, const bool& makeNewMesh) +{ + if (!inbuffer || !inbuffer->getPipeline()) + return nullptr; + + constexpr uint32_t MAX_ATTRIBS = ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + + bool bufferPresent[MAX_ATTRIBS]; + + size_t vertexAttrSize[MAX_ATTRIBS]; + size_t vertexSize = 0; + for (size_t i=0; igetAttribBoundBuffer(i).buffer; + bufferPresent[i] = inbuffer->isAttributeEnabled(i); + if (bufferPresent[i] && buf) + { + const E_FORMAT componentType = inbuffer->getAttribFormat(i); + vertexAttrSize[i] = getTexelOrBlockBytesize(componentType); + vertexSize += vertexAttrSize[i]; + } + } + + auto cmpfunc = [&, inbuffer, vertexSize, _errMetrics](const void* _va, const void* _vb) { + return cmpVertices(inbuffer, _va, _vb, vertexSize, _errMetrics); + }; + + const uint32_t vertexCount = IMeshManipulator::upperBoundVertexID(inbuffer); + const E_INDEX_TYPE oldIndexType = inbuffer->getIndexType(); + + if (!vertexCount) + return nullptr; + + // reset redirect list + uint32_t* redirects = new uint32_t[vertexCount]; + + uint32_t maxRedirect = 0; + + uint8_t* epicData = reinterpret_cast(_NBL_ALIGNED_MALLOC(vertexSize*vertexCount,_NBL_SIMD_ALIGNMENT)); + for (auto i=0u; igetAttribStride(k); + uint8_t* sourcePtr = inbuffer->getAttribPointer(k) + i*stride; + memcpy(currentVertexPtr,sourcePtr,vertexAttrSize[k]); + currentVertexPtr += vertexAttrSize[k]; + } + } + + for (auto i=0u; imaxRedirect) + maxRedirect = redir; + } + _NBL_ALIGNED_FREE(epicData); + + void* oldIndices = inbuffer->getIndices(); + core::smart_refctd_ptr clone; + if (makeNewMesh) + clone = core::smart_refctd_ptr_static_cast(inbuffer->clone(0u)); + else + { + if (!oldIndices) + { + inbuffer->setIndexBufferBinding({ 0u, ICPUBuffer::create({ (maxRedirect >= 0x10000u ? sizeof(uint32_t) : sizeof(uint16_t)) * inbuffer->getIndexCount() }) }); + inbuffer->setIndexType(maxRedirect>=0x10000u ? EIT_32BIT:EIT_16BIT); + } + } + + // TODO: reduce the code duplication via the use of a generic lambda (with a `auto*`) + if (oldIndexType==EIT_16BIT) + { + uint16_t* indicesIn = reinterpret_cast(oldIndices); + if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) + { + uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[indicesIn[i]]; + } + else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) + { + uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[indicesIn[i]]; + } + } + else if (oldIndexType==EIT_32BIT) + { + uint32_t* indicesIn = reinterpret_cast(oldIndices); + if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) + { + uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[indicesIn[i]]; + } + else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) + { + uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[indicesIn[i]]; + } + } + else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) + { + uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[i]; + } + else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) + { + uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); + for (size_t i=0; igetIndexCount(); i++) + indicesOut[i] = redirects[i]; + } + delete [] redirects; + + if (makeNewMesh) + return clone; + else + return core::smart_refctd_ptr(inbuffer); +} + +core::smart_refctd_ptr IMeshManipulator::createOptimizedMeshBuffer(const ICPUMeshBuffer* _inbuffer, const SErrorMetric* _errMetric) +{ + if (!_inbuffer) + return nullptr; + const auto oldPipeline = _inbuffer->getPipeline(); + auto outbuffer = core::move_and_static_cast(_inbuffer->clone(oldPipeline ? 1u:0u)); + if (!oldPipeline) + return outbuffer; + + // restore shared skeleton and descriptor set + outbuffer->setAttachedDescriptorSet(core::smart_refctd_ptr(const_cast(_inbuffer->getAttachedDescriptorSet()))); + outbuffer->setSkin( + SBufferBinding(reinterpret_cast&>(_inbuffer->getInverseBindPoseBufferBinding())), + SBufferBinding(reinterpret_cast&>(_inbuffer->getJointAABBBufferBinding())), + _inbuffer->getJointCount(),_inbuffer->getMaxJointsPerVertex() + ); + + // make index buffer 0,1,2,3,4,... if nothing's mapped + // make 32bit index buffer if 16bit one is present + // convert index buffer for triangle primitives + constexpr auto canonicalMeshBufferIndexType = EIT_32BIT; + IMeshManipulator::homogenizePrimitiveTypeAndIndices(&outbuffer.get(),&outbuffer.get()+1,EPT_TRIANGLE_LIST,canonicalMeshBufferIndexType); + if (outbuffer->getPipeline()->getCachedCreationParams().primitiveAssembly.primitiveType != EPT_TRIANGLE_LIST) + return nullptr; + + // STEP: weld + createMeshBufferWelded(outbuffer.get(), _errMetric, false, false); + + // STEP: filter invalid triangles + if (!_inbuffer->isSkinned()) + filterInvalidTriangles(outbuffer.get()); + + // STEP: overdraw optimization + COverdrawMeshOptimizer::createOptimized(outbuffer.get(),outbuffer.get()); + + // STEP: Forsyth + { + uint32_t* indices = reinterpret_cast(outbuffer->getIndices()); + CForsythVertexCacheOptimizer forsyth; + const uint32_t vertexCount = IMeshManipulator::upperBoundVertexID(_inbuffer); + forsyth.optimizeTriangleOrdering(vertexCount, outbuffer->getIndexCount(), indices, indices); + } + + // STEP: prefetch optimization + outbuffer = CMeshManipulator::createMeshBufferFetchOptimized(outbuffer.get()); // here we also get interleaved attributes (single vertex buffer) + + // STEP: requantization + requantizeMeshBuffer(outbuffer.get(), _errMetric); + + // STEP: reduce index buffer to 16bit or completely get rid of it + { + const void* const indices = outbuffer->getIndices(); + uint32_t* indicesCopy = (uint32_t*)_NBL_ALIGNED_MALLOC(outbuffer->getIndexCount()*4,_NBL_SIMD_ALIGNMENT); + memcpy(indicesCopy, indices, outbuffer->getIndexCount()*4); + std::sort(indicesCopy, indicesCopy + outbuffer->getIndexCount()); + + bool continuous = true; // indices are i.e. 0,1,2,3,4,5,... (also implies indices being unique) + bool unique = true; // indices are unique (but not necessarily continuos) + + for (size_t i = 0; i < outbuffer->getIndexCount(); ++i) + { + uint32_t idx = indicesCopy[i], prevIdx = 0xffffffffu; + if (i) + { + prevIdx = indicesCopy[i-1]; + + if (idx == prevIdx) + { + unique = false; + continuous = false; + break; + } + if (idx != prevIdx + 1) + continuous = false; + } + } + + const uint32_t minIdx = indicesCopy[0]; + const uint32_t maxIdx = indicesCopy[outbuffer->getIndexCount() - 1]; + + _NBL_ALIGNED_FREE(indicesCopy); + + core::smart_refctd_ptr newIdxBuffer; + bool verticesMustBeReordered = false; + E_INDEX_TYPE newIdxType = EIT_UNKNOWN; + + if (!continuous) + { + if (unique) + { + // no index buffer + // vertices have to be reordered + verticesMustBeReordered = true; + } + else + { + if (maxIdx - minIdx <= USHRT_MAX) + newIdxType = EIT_16BIT; + else + newIdxType = EIT_32BIT; + + outbuffer->setBaseVertex(outbuffer->getBaseVertex() + minIdx); + + if (newIdxType == EIT_16BIT) + { + newIdxBuffer = ICPUBuffer::create({ sizeof(uint16_t)*outbuffer->getIndexCount() }); + // no need to change index buffer offset because it's always 0 (after duplicating original mesh) + for (size_t i = 0; i < outbuffer->getIndexCount(); ++i) + reinterpret_cast(newIdxBuffer->getPointer())[i] = reinterpret_cast(indices)[i] - minIdx; + } + } + } + else + { + outbuffer->setBaseVertex(outbuffer->getBaseVertex()+minIdx); + } + + outbuffer->setIndexType(newIdxType); + outbuffer->setIndexBufferBinding({ 0u, std::move(newIdxBuffer) }); + + if (verticesMustBeReordered) + { + auto* pipeline = outbuffer->getPipeline(); + + // reorder vertices according to index buffer +#define _ACCESS_IDX(n) ((newIdxType == EIT_32BIT) ? *(reinterpret_cast(indices)+(n)) : *(reinterpret_cast(indices)+(n))) + + const uint32_t posId = outbuffer->getPositionAttributeIx(); + const size_t bufsz = outbuffer->getAttribBoundBuffer(posId).buffer->getSize(); + + const size_t vertexSize = pipeline->getCachedCreationParams().vertexInput.bindings[0].stride; + uint8_t* const v = reinterpret_cast(outbuffer->getAttribBoundBuffer(posId).buffer->getPointer()); // after prefetch optim. we have guarantee of single vertex buffer so we can do like this + uint8_t* const vCopy = reinterpret_cast(_NBL_ALIGNED_MALLOC(bufsz, _NBL_SIMD_ALIGNMENT)); + memcpy(vCopy, v, bufsz); + + size_t baseVtx = outbuffer->getBaseVertex(); + for (size_t i = 0; i < outbuffer->getIndexCount(); ++i) + { + const uint32_t idx = _ACCESS_IDX(i+baseVtx); + if (idx != i+baseVtx) + memcpy(v + (vertexSize*(i + baseVtx)), vCopy + (vertexSize*idx), vertexSize); + } +#undef _ACCESS_IDX + _NBL_ALIGNED_FREE(vCopy); + } + } + + return outbuffer; +} + +void IMeshManipulator::requantizeMeshBuffer(ICPUMeshBuffer* _meshbuffer, const SErrorMetric* _errMetric) +{ + constexpr uint32_t MAX_ATTRIBS = ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; + + CMeshManipulator::SAttrib newAttribs[MAX_ATTRIBS]; + for (size_t i = 0u; i < MAX_ATTRIBS; ++i) + newAttribs[i].vaid = i; + + CQuantNormalCache quantizationCache; + + core::unordered_map> attribsI; + core::unordered_map> attribsF; + for (size_t vaid = 0u; vaid < MAX_ATTRIBS; ++vaid) + { + const E_FORMAT type = _meshbuffer->getAttribFormat(vaid); + + const auto& vbuf = _meshbuffer->getAttribBoundBuffer(vaid).buffer; + if (_meshbuffer->isAttributeEnabled(vaid) && vbuf) + { + if (!isNormalizedFormat(type) && isIntegerFormat(type)) + attribsI[vaid] = CMeshManipulator::findBetterFormatI(&newAttribs[vaid].type, &newAttribs[vaid].size, &newAttribs[vaid].prevType, _meshbuffer, vaid, _errMetric[vaid]); + else + attribsF[vaid] = CMeshManipulator::findBetterFormatF(&newAttribs[vaid].type, &newAttribs[vaid].size, &newAttribs[vaid].prevType, _meshbuffer, vaid, _errMetric[vaid], quantizationCache); + } + } + + const size_t activeAttributeCount = attribsI.size() + attribsF.size(); + +#ifdef _NBL_DEBUG + { + core::unordered_set sizesSet; + for (core::unordered_map>::iterator it = attribsI.begin(); it != attribsI.end(); ++it) + sizesSet.insert(it->second.size()); + for (core::unordered_map>::iterator it = attribsF.begin(); it != attribsF.end(); ++it) + sizesSet.insert(it->second.size()); + _NBL_DEBUG_BREAK_IF(sizesSet.size() != 1); + } #endif -} // end namespace nbl::asset + const size_t vertexCnt = (!attribsI.empty() ? attribsI.begin()->second.size() : (!attribsF.empty() ? attribsF.begin()->second.size() : 0)); + + std::sort(newAttribs, newAttribs + MAX_ATTRIBS, std::greater()); // sort decreasing by size + + for (size_t i = 0u; i < activeAttributeCount; ++i) + { + const uint32_t typeSz = getTexelOrBlockBytesize(newAttribs[i].type); + const size_t alignment = (typeSz / getFormatChannelCount(newAttribs[i].type) == 8u) ? 8ull : 4ull; // if format 64bit per channel, than align to 8 + + newAttribs[i].offset = (i ? newAttribs[i - 1].offset + newAttribs[i - 1].size : 0u); + const size_t mod = newAttribs[i].offset % alignment; + newAttribs[i].offset += mod; + } + + const size_t vertexSize = newAttribs[activeAttributeCount - 1].offset + newAttribs[activeAttributeCount - 1].size; + + auto newVertexBuffer = ICPUBuffer::create({ vertexCnt * vertexSize }); + + constexpr uint32_t VTX_BUF_BINDING = 0u; + assert(_meshbuffer->getVertexBufferBindings()[0].buffer); + assert(_meshbuffer->isVertexAttribBufferBindingEnabled(VTX_BUF_BINDING)); + _meshbuffer->setVertexBufferBinding({ 0u, core::smart_refctd_ptr(newVertexBuffer) }, VTX_BUF_BINDING); + + auto* pipeline = _meshbuffer->getPipeline(); + auto& vtxParams = pipeline->getCachedCreationParams().vertexInput; + + vtxParams.bindings[VTX_BUF_BINDING].stride = vertexSize; + vtxParams.bindings[VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; + for (size_t i = 0u; i < activeAttributeCount; ++i) + { + const uint32_t vaid = newAttribs[i].vaid; + vtxParams.attributes[vaid].binding = VTX_BUF_BINDING; + vtxParams.attributes[vaid].format = newAttribs[i].type; + vtxParams.attributes[vaid].relativeOffset = newAttribs[i].offset; + core::unordered_map>::iterator iti = attribsI.find(newAttribs[i].vaid); + if (iti != attribsI.end()) + { + const core::vector& attrVec = iti->second; + for (size_t ai = 0u; ai < attrVec.size(); ++ai) + { + const bool check = _meshbuffer->setAttribute(attrVec[ai].pointer, newAttribs[i].vaid, ai); + _NBL_DEBUG_BREAK_IF(!check) + } + continue; + } + + core::unordered_map>::iterator itf = attribsF.find(newAttribs[i].vaid); + if (itf != attribsF.end()) + { + const core::vector& attrVec = itf->second; + for (size_t ai = 0u; ai < attrVec.size(); ++ai) + { + const bool check = _meshbuffer->setAttribute(attrVec[ai], newAttribs[i].vaid, ai); + _NBL_DEBUG_BREAK_IF(!check) + } + } + } +} + + + +void IMeshManipulator::filterInvalidTriangles(ICPUMeshBuffer* _input) +{ + if (!_input || !_input->getPipeline() || !_input->getIndices()) + return; + + switch (_input->getIndexType()) + { + case EIT_16BIT: + return CMeshManipulator::_filterInvalidTriangles(_input); + case EIT_32BIT: + return CMeshManipulator::_filterInvalidTriangles(_input); + default: return; + } +} + +template +void CMeshManipulator::_filterInvalidTriangles(ICPUMeshBuffer* _input) +{ + const size_t size = _input->getIndexCount() * sizeof(IdxT); + void* const copy = _NBL_ALIGNED_MALLOC(size,_NBL_SIMD_ALIGNMENT); + memcpy(copy, _input->getIndices(), size); + + struct Triangle + { + IdxT i[3]; + } *const begin = (Triangle*)copy, *const end = (Triangle*)(reinterpret_cast(copy) + size); + + Triangle* const newEnd = std::remove_if(begin, end, + [&_input](const Triangle& _t) { + core::vectorSIMDf p0, p1, p2; + const uint32_t pvaid = _input->getPositionAttributeIx(); + _input->getAttribute(p0, pvaid, _t.i[0]); + _input->getAttribute(p1, pvaid, _t.i[1]); + _input->getAttribute(p2, pvaid, _t.i[2]); + return core::length(core::cross(p1 - p0, p2 - p0)).x<=1.0e-19F; + }); + const size_t newSize = std::distance(begin, newEnd) * sizeof(Triangle); + + auto newBuf = ICPUBuffer::create({ newSize }); + memcpy(newBuf->getPointer(), copy, newSize); + _NBL_ALIGNED_FREE(copy); + + SBufferBinding idxBufBinding; + idxBufBinding.offset = 0ull; + idxBufBinding.buffer = std::move(newBuf); + _input->setIndexBufferBinding(std::move(idxBufBinding)); + _input->setIndexCount(newSize/sizeof(IdxT)); +} +template void CMeshManipulator::_filterInvalidTriangles(ICPUMeshBuffer* _input); +template void CMeshManipulator::_filterInvalidTriangles(ICPUMeshBuffer* _input); + +core::vector CMeshManipulator::findBetterFormatF(E_FORMAT* _outType, size_t* _outSize, E_FORMAT* _outPrevType, const ICPUMeshBuffer* _meshbuffer, uint32_t _attrId, const SErrorMetric& _errMetric, CQuantNormalCache& _cache) +{ + if (!_meshbuffer->getPipeline()) + return {}; + + const E_FORMAT thisType = _meshbuffer->getAttribFormat(_attrId); + + if (!isFloatingPointFormat(thisType) && !isNormalizedFormat(thisType) && !isScaledFormat(thisType)) + return {}; + + core::vector attribs; + + + const uint32_t cpa = getFormatChannelCount(thisType); + + float min[4]{ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + float max[4]{ -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX }; + + core::vectorSIMDf attr; + const uint32_t cnt = IMeshManipulator::upperBoundVertexID(_meshbuffer); + for (uint32_t idx = 0u; idx < cnt; ++idx) + { + _meshbuffer->getAttribute(attr, _attrId, idx); + attribs.push_back(attr); + for (uint32_t i = 0; i < cpa ; ++i) + { + if (attr.pointer[i] < min[i]) + min[i] = attr.pointer[i]; + if (attr.pointer[i] > max[i]) + max[i] = attr.pointer[i]; + } + } + + core::vector possibleTypes = findTypesOfProperRangeF(thisType, getTexelOrBlockBytesize(thisType), min, max, _errMetric); + std::sort(possibleTypes.begin(), possibleTypes.end(), [](const SAttribTypeChoice& t1, const SAttribTypeChoice& t2) { return getTexelOrBlockBytesize(t1.type) < getTexelOrBlockBytesize(t2.type); }); + + *_outPrevType = thisType; + *_outType = thisType; + *_outSize = getTexelOrBlockBytesize(*_outType); + + for (const SAttribTypeChoice& t : possibleTypes) + { + if (calcMaxQuantizationError({ thisType }, t, attribs, _errMetric, _cache)) + { + if (getTexelOrBlockBytesize(t.type) < getTexelOrBlockBytesize(thisType)) + { + *_outType = t.type; + *_outSize = getTexelOrBlockBytesize(*_outType); + } + + return attribs; + } + } + + return attribs; +} + +core::vector CMeshManipulator::findBetterFormatI(E_FORMAT* _outType, size_t* _outSize, E_FORMAT* _outPrevType, const ICPUMeshBuffer* _meshbuffer, uint32_t _attrId, const SErrorMetric& _errMetric) +{ + if (!_meshbuffer->getPipeline()) + return {}; + + const E_FORMAT thisType = _meshbuffer->getAttribFormat(_attrId); + + if (!isIntegerFormat(thisType)) + return {}; + + if (isBGRALayoutFormat(thisType)) + return {}; // BGRA is supported only by a few normalized types (this is function for integer types) + + core::vector attribs; + + + const uint32_t cpa = getFormatChannelCount(thisType); + + uint32_t min[4]; + uint32_t max[4]; + if (!isSignedFormat(thisType)) + for (size_t i = 0; i < 4; ++i) + min[i] = UINT_MAX; + else + for (size_t i = 0; i < 4; ++i) + min[i] = INT_MAX; + if (!isSignedFormat(thisType)) + for (size_t i = 0; i < 4; ++i) + max[i] = 0; + else + for (size_t i = 0; i < 4; ++i) + max[i] = INT_MIN; + + + SIntegerAttr attr; + const uint32_t cnt = IMeshManipulator::upperBoundVertexID(_meshbuffer); + for (uint32_t idx = 0u; idx < cnt; ++idx) + { + _meshbuffer->getAttribute(attr.pointer, _attrId, idx); + attribs.push_back(attr); + for (uint32_t i = 0; i < cpa; ++i) + { + if (!isSignedFormat(thisType)) + { + if (attr.pointer[i] < min[i]) + min[i] = attr.pointer[i]; + if (attr.pointer[i] > max[i]) + max[i] = attr.pointer[i]; + } + else + { + if (((int32_t*)attr.pointer + i)[0] < ((int32_t*)min + i)[0]) + min[i] = attr.pointer[i]; + if (((int32_t*)attr.pointer + i)[0] > ((int32_t*)max + i)[0]) + max[i] = attr.pointer[i]; + } + } + } + + *_outPrevType = *_outType = thisType; + *_outSize = getTexelOrBlockBytesize(thisType); + *_outPrevType = thisType; + + if (_errMetric.method == EEM_ANGLES) // native integers normals does not change + return attribs; + + *_outType = getBestTypeI(thisType, _outSize, min, max); + if (getTexelOrBlockBytesize(*_outType) >= getTexelOrBlockBytesize(thisType)) + { + *_outType = thisType; + *_outSize = getTexelOrBlockBytesize(thisType); + } + return attribs; +} + +E_FORMAT CMeshManipulator::getBestTypeI(E_FORMAT _originalType, size_t* _outSize, const uint32_t* _min, const uint32_t* _max) +{ + using namespace video; + + const bool isNativeInteger = isIntegerFormat(_originalType); + const bool isUnsigned = !isSignedFormat(_originalType); + + const uint32_t originalCpa = getFormatChannelCount(_originalType); + + core::vector nativeInts{ + EF_R8G8_UINT, + EF_R8G8_SINT, + EF_R8G8B8_UINT, + EF_R8G8B8_SINT, + EF_R8G8B8A8_UINT, + EF_R8G8B8A8_SINT, + EF_A2B10G10R10_UINT_PACK32, + EF_A2B10G10R10_SINT_PACK32, + EF_R16_UINT, + EF_R16_SINT, + EF_R16G16_UINT, + EF_R16G16_SINT, + EF_R16G16B16_UINT, + EF_R16G16B16_SINT, + EF_R16G16B16A16_UINT, + EF_R16G16B16A16_SINT, + EF_R32_UINT, + EF_R32_SINT, + EF_R32G32_UINT, + EF_R32G32_SINT, + EF_R32G32B32_UINT, + EF_R32G32B32_SINT, + EF_R32G32B32A32_UINT, + EF_R32G32B32A32_SINT + }; + core::vector scaledInts{ + EF_R8G8_USCALED, + EF_R8G8_SSCALED, + EF_R8G8B8_USCALED, + EF_R8G8B8_SSCALED, + EF_R8G8B8A8_USCALED, + EF_R8G8B8A8_SSCALED, + EF_A2B10G10R10_USCALED_PACK32, + EF_A2B10G10R10_SSCALED_PACK32, + EF_R16_USCALED, + EF_R16_SSCALED, + EF_R16G16_USCALED, + EF_R16G16_SSCALED, + EF_R16G16B16_USCALED, + EF_R16G16B16_SSCALED, + EF_R16G16B16A16_USCALED, + EF_R16G16B16A16_SSCALED + }; + + core::vector& all = isNativeInteger ? nativeInts : scaledInts; + if (originalCpa > 1u) + { + all.erase( + std::remove_if(all.begin(), all.end(), + [originalCpa](E_FORMAT fmt) { return getFormatChannelCount(fmt) < originalCpa; } + ), + all.end() + ); + } + + auto minValueOfTypeINT = [](E_FORMAT _fmt, uint32_t _cmpntNum) -> int32_t { + if (!isSignedFormat(_fmt)) + return 0; + + switch (_fmt) + { + case EF_A2R10G10B10_SSCALED_PACK32: + case EF_A2R10G10B10_SINT_PACK32: + case EF_A2B10G10R10_SSCALED_PACK32: + case EF_A2B10G10R10_SINT_PACK32: + if (_cmpntNum < 3u) + return -512; + else return -2; + break; + default: + { + const uint32_t bitsPerCh = getTexelOrBlockBytesize(_fmt)*8u/getFormatChannelCount(_fmt); + return int32_t(-uint64_t(1ull<<(bitsPerCh-1u))); + } + } + }; + auto maxValueOfTypeINT = [](E_FORMAT _fmt, uint32_t _cmpntNum) -> uint32_t { + switch (_fmt) + { + case EF_A2R10G10B10_USCALED_PACK32: + case EF_A2R10G10B10_UINT_PACK32: + case EF_A2B10G10R10_USCALED_PACK32: + case EF_A2B10G10R10_UINT_PACK32: + if (_cmpntNum < 3u) + return 1023u; + else return 3u; + break; + case EF_A2R10G10B10_SSCALED_PACK32: + case EF_A2R10G10B10_SINT_PACK32: + case EF_A2B10G10R10_SSCALED_PACK32: + case EF_A2B10G10R10_SINT_PACK32: + if (_cmpntNum < 3u) + return 511u; + else return 1u; + break; + default: + { + const uint32_t bitsPerCh = getTexelOrBlockBytesize(_fmt)*8u/getFormatChannelCount(_fmt); + const uint64_t r = (1ull<>1); + } + } + }; + + E_FORMAT bestType = _originalType; + for (auto it = all.begin(); it != all.end(); ++it) + { + bool ok = true; + for (uint32_t cmpntNum = 0; cmpntNum < originalCpa; ++cmpntNum) // check only `_cpa` components because even if (chosenCpa > _cpa), we don't care about extra components + { + if (isUnsigned) + { + if (!(_min[cmpntNum] >= minValueOfTypeINT(*it, cmpntNum) && _max[cmpntNum] <= maxValueOfTypeINT(*it, cmpntNum))) //! TODO: FIX signed vs. unsigned comparison + { + ok = false; + break; + } + } + else + { + if (!(((int32_t*)(_min + cmpntNum))[0] >= minValueOfTypeINT(*it, cmpntNum) && ((int32_t*)(_max + cmpntNum))[0] <= maxValueOfTypeINT(*it, cmpntNum))) //! TODO: FIX signed vs. unsigned comparison + { + ok = false; + break; + } + } + } + if (ok && getTexelOrBlockBytesize(*it) < getTexelOrBlockBytesize(bestType)) // vertexAttrSize array defined in IMeshBuffer.h + { + bestType = *it; + *_outSize = getTexelOrBlockBytesize(bestType); + } + } + + return bestType; +} + +core::vector CMeshManipulator::findTypesOfProperRangeF(E_FORMAT _type, size_t _sizeThreshold, const float * _min, const float * _max, const SErrorMetric& _errMetric) +{ + using namespace video; + + core::vector all{ + EF_B10G11R11_UFLOAT_PACK32, + EF_R16_SFLOAT, + EF_R16G16_SFLOAT, + EF_R16G16B16_SFLOAT, + EF_R16G16B16A16_SFLOAT, + EF_R32_SFLOAT, + EF_R32G32_SFLOAT, + EF_R32G32B32_SFLOAT, + EF_R32G32B32A32_SFLOAT, + EF_R8G8_UNORM, + EF_R8G8_SNORM, + EF_R8G8B8_UNORM, + EF_R8G8B8_SNORM, + EF_B8G8R8A8_UNORM, //bgra + EF_R8G8B8A8_UNORM, + EF_R8G8B8A8_SNORM, + EF_A2B10G10R10_UNORM_PACK32, + EF_A2B10G10R10_SNORM_PACK32, + EF_A2R10G10B10_UNORM_PACK32, //bgra + EF_A2R10G10B10_SNORM_PACK32, //bgra + EF_R16_UNORM, + EF_R16_SNORM, + EF_R16G16_UNORM, + EF_R16G16_SNORM, + EF_R16G16B16_UNORM, + EF_R16G16B16_SNORM, + EF_R16G16B16A16_UNORM, + EF_R16G16B16A16_SNORM + }; + core::vector normalized{ + EF_B8G8R8A8_UNORM, //bgra + EF_R8G8B8A8_UNORM, + EF_R8G8B8A8_SNORM, + EF_A2B10G10R10_UNORM_PACK32, + EF_A2B10G10R10_SNORM_PACK32, + EF_A2R10G10B10_UNORM_PACK32, //bgra + EF_A2R10G10B10_SNORM_PACK32, //bgra + EF_R16_UNORM, + EF_R16_SNORM, + EF_R16G16_UNORM, + EF_R16G16_SNORM, + EF_R16G16B16_UNORM, + EF_R16G16B16_SNORM, + EF_R16G16B16A16_UNORM, + EF_R16G16B16A16_SNORM + }; + core::vector bgra{ + EF_B8G8R8A8_UNORM, //bgra + EF_A2R10G10B10_UNORM_PACK32, //bgra + EF_A2R10G10B10_SNORM_PACK32, //bgra + }; + core::vector normals{ + EF_R8_SNORM, + EF_R8G8_SNORM, + EF_R8G8B8_SNORM, + EF_R8G8B8A8_SNORM, + EF_R16_SNORM, + EF_R16G16_SNORM, + EF_R16G16B16_SNORM, + EF_R16G16B16A16_SNORM, + EF_A2B10G10R10_SNORM_PACK32, + EF_A2R10G10B10_SNORM_PACK32, //bgra + EF_R16_SFLOAT, + EF_R16G16_SFLOAT, + EF_R16G16B16_SFLOAT, + EF_R16G16B16A16_SFLOAT + }; + + auto minValueOfTypeFP = [](E_FORMAT _fmt, uint32_t _cmpntNum) -> float { + if (isNormalizedFormat(_fmt)) + { + return isSignedFormat(_fmt) ? -1.f : 0.f; + } + switch (_fmt) + { + case EF_R16_SFLOAT: + case EF_R16G16_SFLOAT: + case EF_R16G16B16_SFLOAT: + case EF_R16G16B16A16_SFLOAT: + return -65504.f; + case EF_R32_SFLOAT: + case EF_R32G32_SFLOAT: + case EF_R32G32B32_SFLOAT: + case EF_R32G32B32A32_SFLOAT: + return -FLT_MAX; + case EF_B10G11R11_UFLOAT_PACK32: + return 0.f; + default: + return 1.f; + } + }; + auto maxValueOfTypeFP = [](E_FORMAT _fmt, uint32_t _cmpntNum) -> float { + if (isNormalizedFormat(_fmt)) + { + return 1.f; + } + switch (_fmt) + { + case EF_R16_SFLOAT: + case EF_R16G16_SFLOAT: + case EF_R16G16B16_SFLOAT: + case EF_R16G16B16A16_SFLOAT: + return 65504.f; + case EF_R32_SFLOAT: + case EF_R32G32_SFLOAT: + case EF_R32G32B32_SFLOAT: + case EF_R32G32B32A32_SFLOAT: + return FLT_MAX; + case EF_B10G11R11_UFLOAT_PACK32: + if (_cmpntNum < 2u) + return 65024.f; + else return 64512.f; + default: + return 0.f; + } + }; + + if (isNormalizedFormat(_type) || _errMetric.method == EEM_ANGLES) + { + if (_errMetric.method == EEM_ANGLES) + { + if (isBGRALayoutFormat(_type)) + { + all = core::vector(1u, EF_A2R10G10B10_SNORM_PACK32); + } + else all = std::move(normals); + } + else if (isBGRALayoutFormat(_type)) + all = std::move(bgra); + else + all = std::move(normalized); + } + + if (isNormalizedFormat(_type) && !isSignedFormat(_type)) + all.erase(std::remove_if(all.begin(), all.end(), [](E_FORMAT _t) { return isSignedFormat(_t); }), all.end()); + else if (isNormalizedFormat(_type) && isSignedFormat(_type)) + all.erase(std::remove_if(all.begin(), all.end(), [](E_FORMAT _t) { return !isSignedFormat(_t); }), all.end()); + + const uint32_t originalCpa = getFormatChannelCount(_type); + all.erase( + std::remove_if(all.begin(), all.end(), + [originalCpa](E_FORMAT fmt) { return getFormatChannelCount(fmt) < originalCpa; } + ), + all.end() + ); + + core::vector possibleTypes; + core::vectorSIMDf min(_min), max(_max); + + for (auto it = all.begin(); it != all.end(); ++it) + { + bool ok = true; + for (uint32_t cmpntNum = 0; cmpntNum < originalCpa; ++cmpntNum) // check only `_cpa` components because even if (chosenCpa > _cpa), we don't care about extra components + { + if (!(min.pointer[cmpntNum] >= minValueOfTypeFP(*it, cmpntNum) && max.pointer[cmpntNum] <= maxValueOfTypeFP(*it, cmpntNum))) + { + ok = false; + break; // break loop comparing (*it)'s range component by component + } + } + if (ok && getTexelOrBlockBytesize(*it) <= _sizeThreshold) + possibleTypes.push_back({*it}); + } + return possibleTypes; +} + +bool CMeshManipulator::calcMaxQuantizationError(const SAttribTypeChoice& _srcType, const SAttribTypeChoice& _dstType, const core::vector& _srcData, const SErrorMetric& _errMetric, CQuantNormalCache& _cache) +{ + using namespace video; + + using QuantF_t = core::vectorSIMDf(*)(const core::vectorSIMDf&, E_FORMAT, E_FORMAT, CQuantNormalCache & _cache); + + QuantF_t quantFunc = nullptr; + + if (_errMetric.method == EEM_ANGLES) + { + switch (_dstType.type) + { + case EF_R8_SNORM: + case EF_R8G8_SNORM: + case EF_R8G8B8_SNORM: + case EF_R8G8B8A8_SNORM: + quantFunc = [](const core::vectorSIMDf& _in, E_FORMAT, E_FORMAT, CQuantNormalCache& _cache) -> core::vectorSIMDf { + uint8_t buf[32]; + ((CQuantNormalCache::value_type_t*)buf)[0] = _cache.quantize(_in); + + core::vectorSIMDf retval; + ICPUMeshBuffer::getAttribute(retval, buf, EF_R8G8B8A8_SNORM); + retval.w = 1.f; + return retval; + }; + break; + case EF_A2R10G10B10_SNORM_PACK32: + case EF_A2B10G10R10_SNORM_PACK32: // bgra + quantFunc = [](const core::vectorSIMDf& _in, E_FORMAT, E_FORMAT, CQuantNormalCache& _cache) -> core::vectorSIMDf { + uint8_t buf[32]; + ((CQuantNormalCache::value_type_t*)buf)[0] = _cache.quantize(_in); + + core::vectorSIMDf retval; + ICPUMeshBuffer::getAttribute(retval, buf, EF_A2R10G10B10_SNORM_PACK32); + retval.w = 1.f; + return retval; + }; + break; + case EF_R16_SNORM: + case EF_R16G16_SNORM: + case EF_R16G16B16_SNORM: + case EF_R16G16B16A16_SNORM: + quantFunc = [](const core::vectorSIMDf& _in, E_FORMAT, E_FORMAT, CQuantNormalCache& _cache) -> core::vectorSIMDf { + uint8_t buf[32]; + ((CQuantNormalCache::value_type_t*)buf)[0] = _cache.quantize(_in); + + core::vectorSIMDf retval; + ICPUMeshBuffer::getAttribute(retval, buf, EF_R16G16B16A16_SNORM); + retval.w = 1.f; + return retval; + }; + break; + default: + quantFunc = nullptr; + break; + } + } + else + { + quantFunc = [](const core::vectorSIMDf& _in, E_FORMAT _inType, E_FORMAT _outType, CQuantNormalCache& _cache) -> core::vectorSIMDf { + uint8_t buf[32]; + ICPUMeshBuffer::setAttribute(_in, buf, _outType); + core::vectorSIMDf out(0.f, 0.f, 0.f, 1.f); + ICPUMeshBuffer::getAttribute(out, buf, _outType); + return out; + }; + } + + _NBL_DEBUG_BREAK_IF(!quantFunc) + if (!quantFunc) + return false; + + for (const core::vectorSIMDf& d : _srcData) + { + const core::vectorSIMDf quantized = quantFunc(d, _srcType.type, _dstType.type, _cache); + if (!compareFloatingPointAttribute(d, quantized, getFormatChannelCount(_srcType.type), _errMetric)) + return false; + } + + return true; +} + +core::smart_refctd_ptr IMeshManipulator::idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) +{ + if (_inIndexType == EIT_16BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::lineStripsToLines(_input, _idxCount); + else + return CMeshManipulator::lineStripsToLines(_input, _idxCount); + } + else if (_inIndexType == EIT_32BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::lineStripsToLines(_input, _idxCount); + else + return CMeshManipulator::lineStripsToLines(_input, _idxCount); + } + return nullptr; +} + +core::smart_refctd_ptr IMeshManipulator::idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) +{ + if (_inIndexType == EIT_16BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); + else + return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); + } + else if (_inIndexType == EIT_32BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); + else + return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); + } + return nullptr; +} + +core::smart_refctd_ptr IMeshManipulator::idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) +{ + if (_inIndexType == EIT_16BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); + else + return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); + } + else if (_inIndexType == EIT_32BIT) + { + if (_outIndexType == EIT_16BIT) + return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); + else + return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); + } + return nullptr; +} + +float IMeshManipulator::DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint) +{ + core::vectorSIMDf PointToStart = InPoint - P0; + core::vectorSIMDf Diff = core::cross(P0 - P1, PointToStart); + + return core::dot(Diff, Diff).x; +} + +float IMeshManipulator::DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal) +{ + core::vectorSIMDf PointToPlane = InPoint - PlanePoint; + + return (core::dot(PointToPlane, PlaneNormal).x >= 0) ? core::abs(core::dot(PointToPlane, PlaneNormal).x) : 0; +} + +core::matrix3x4SIMD IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer) +{ + auto FindMinMaxProj = [&](const core::vectorSIMDf& Dir, const core::vectorSIMDf Extrema[]) -> core::vectorSIMDf + { + float MinPoint, MaxPoint; + MinPoint = MaxPoint = core::dot(Dir, Extrema[0]).x; + + for (int i = 1; i < 12; i++) { + float Proj = core::dot(Dir, Extrema[i]).x; + if (MinPoint > Proj) MinPoint = Proj; + if (MaxPoint < Proj) MaxPoint = Proj; + } + + return core::vectorSIMDf(MaxPoint, MinPoint, 0); + }; + + auto ComputeAxis = [&](const core::vectorSIMDf& P0, const core::vectorSIMDf& P1, const core::vectorSIMDf& P2, core::vectorSIMDf* AxesEdge, float& PrevQuality, const core::vectorSIMDf Extrema[]) -> void + { + core::vectorSIMDf e0 = P1 - P0; + core::vectorSIMDf Edges[3]; + Edges[0] = e0 / core::length(e0); + Edges[1] = core::cross(P2 - P1, P1 - P0); + Edges[1] = Edges[1] / core::length(Edges[1]); + Edges[2] = core::cross(Edges[0], Edges[1]); + + core::vectorSIMDf Edge10Proj = FindMinMaxProj(Edges[0], Extrema); + core::vectorSIMDf Edge20Proj = FindMinMaxProj(Edges[1], Extrema); + core::vectorSIMDf Edge30Proj = FindMinMaxProj(Edges[2], Extrema); + core::vectorSIMDf Max2 = core::vectorSIMDf(Edge10Proj.x, Edge20Proj.x, Edge30Proj.x); + core::vectorSIMDf Min2 = core::vectorSIMDf(Edge10Proj.y, Edge20Proj.y, Edge30Proj.y); + core::vectorSIMDf Diff = Max2 - Min2; + float Quality = Diff.x * Diff.y + Diff.x * Diff.z + Diff.y * Diff.z; + + if (Quality < PrevQuality) { + PrevQuality = Quality; + for (int i = 0; i < 3; i++) { + AxesEdge[i] = Edges[i]; + } + } + }; + + core::vectorSIMDf Extrema[12]; + float A = (core::sqrt(5.0f) - 1.0f) / 2.0f; + core::vectorSIMDf N[6]; + N[0] = core::vectorSIMDf(0, 1, A); + N[1] = core::vectorSIMDf(0, 1, -A); + N[2] = core::vectorSIMDf(1, A, 0); + N[3] = core::vectorSIMDf(1, -A, 0); + N[4] = core::vectorSIMDf(A, 0, 1); + N[5] = core::vectorSIMDf(A, 0, -1); + float Bs[12]; + float B; + int indexcount = meshbuffer->getIndexCount(); + core::vectorSIMDf CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(0)); + core::vectorSIMDf AABBMax = CachedVertex; + core::vectorSIMDf AABBMin = CachedVertex; + for (int k = 0; k < 12; k += 2) { + B = core::dot(N[k / 2], CachedVertex).x; + Extrema[k] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k] = B; + Extrema[k + 1] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k + 1] = B; + } + for (uint32_t j = 1u; j < indexcount; j += 1u) { + CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(j)); + for (int k = 0; k < 12; k += 2) { + B = core::dot(N[k / 2], CachedVertex).x; + if (B > Bs[k] || j == 0) { Extrema[k] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k] = B; } + if (B < Bs[k + 1] || j == 0) { Extrema[k + 1] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k + 1] = B; } + } + AABBMax = core::max(AABBMax, CachedVertex); + AABBMin = core::min(AABBMin, CachedVertex); + } + + int LBTE1 = -1; + float MaxDiff = 0; + for (int i = 0; i < 12; i += 2) { + core::vectorSIMDf C = (Extrema[i]) - (Extrema[i + 1]); float TempDiff = core::dot(C, C).x; if (TempDiff > MaxDiff) { MaxDiff = TempDiff; LBTE1 = i; } + } + assert(LBTE1 != -1); + + core::vectorSIMDf P0 = Extrema[LBTE1]; + core::vectorSIMDf P1 = Extrema[LBTE1 + 1]; + + int LBTE3 = 0; + float MaxDist = 0; + int RemoveAt = 0; + + for (int i = 0; i < 10; i++) { + int index = i; + if (index >= LBTE1) index += 2; + float TempDist = DistanceToLine(P0, P1, core::vectorSIMDf(Extrema[index].x, Extrema[index].y, Extrema[index].z)); + if (TempDist > MaxDist || i == 0) { + MaxDist = TempDist; + LBTE3 = index; + RemoveAt = i; + } + } + + core::vectorSIMDf P2 = Extrema[LBTE3]; + core::vectorSIMDf ExtremaRemainingTemp[9]; + for (int i = 0; i < 9; i++) { + int index = i; + if (index >= RemoveAt) index += 1; + if (index >= LBTE1) index += 2; + ExtremaRemainingTemp[i] = core::vectorSIMDf(Extrema[index].x, Extrema[index].y, Extrema[index].z, index); + } + + float MaxDistPlane = -9999999.0f; + float MinDistPlane = -9999999.0f; + float TempDistPlane = 0; + core::vectorSIMDf Q0 = core::vectorSIMDf(0, 0, 0); + core::vectorSIMDf Q1 = core::vectorSIMDf(0, 0, 0); + core::vectorSIMDf Norm = core::cross(P2 - P1, P2 - P0); + Norm /= core::length(Norm); + for (int i = 0; i < 9; i++) { + TempDistPlane = DistanceToPlane(core::vectorSIMDf(ExtremaRemainingTemp[i].x, ExtremaRemainingTemp[i].y, ExtremaRemainingTemp[i].z), P0, Norm); + if (TempDistPlane > MaxDistPlane || i == 0) { + MaxDistPlane = TempDistPlane; + Q0 = Extrema[(int)ExtremaRemainingTemp[i].w]; + } + TempDistPlane = DistanceToPlane(core::vectorSIMDf(ExtremaRemainingTemp[i].x, ExtremaRemainingTemp[i].y, ExtremaRemainingTemp[i].z), P0, -Norm); + if (TempDistPlane > MinDistPlane || i == 0) { + MinDistPlane = TempDistPlane; + Q1 = Extrema[(int)ExtremaRemainingTemp[i].w]; + } + } + + float BestQuality = 99999999999999.0f; + core::vectorSIMDf BestAxis[3]; + ComputeAxis(P0, P1, P2, BestAxis, BestQuality, Extrema); + ComputeAxis(P2, P0, P1, BestAxis, BestQuality, Extrema); + ComputeAxis(P1, P2, P0, BestAxis, BestQuality, Extrema); + + ComputeAxis(P1, Q0, P0, BestAxis, BestQuality, Extrema); + ComputeAxis(P0, P1, Q0, BestAxis, BestQuality, Extrema); + ComputeAxis(Q0, P0, P1, BestAxis, BestQuality, Extrema); + + ComputeAxis(P2, Q0, P0, BestAxis, BestQuality, Extrema); + ComputeAxis(P0, P2, Q0, BestAxis, BestQuality, Extrema); + ComputeAxis(Q0, P0, P2, BestAxis, BestQuality, Extrema); + + ComputeAxis(P1, Q0, P2, BestAxis, BestQuality, Extrema); + ComputeAxis(P2, P1, Q0, BestAxis, BestQuality, Extrema); + ComputeAxis(Q0, P2, P1, BestAxis, BestQuality, Extrema); + + ComputeAxis(P1, Q1, P0, BestAxis, BestQuality, Extrema); + ComputeAxis(P0, P1, Q1, BestAxis, BestQuality, Extrema); + ComputeAxis(Q1, P0, P1, BestAxis, BestQuality, Extrema); + + ComputeAxis(P2, Q1, P0, BestAxis, BestQuality, Extrema); + ComputeAxis(P0, P2, Q1, BestAxis, BestQuality, Extrema); + ComputeAxis(Q1, P0, P2, BestAxis, BestQuality, Extrema); + + ComputeAxis(P1, Q1, P2, BestAxis, BestQuality, Extrema); + ComputeAxis(P2, P1, Q1, BestAxis, BestQuality, Extrema); + ComputeAxis(Q1, P2, P1, BestAxis, BestQuality, Extrema); + + core::matrix3x4SIMD TransMat = core::matrix3x4SIMD( + BestAxis[0].x, BestAxis[1].x, BestAxis[2].x, 0, + BestAxis[0].y, BestAxis[1].y, BestAxis[2].y, 0, + BestAxis[0].z, BestAxis[1].z, BestAxis[2].z, 0); + + core::vectorSIMDf MinPoint; + core::vectorSIMDf MaxPoint; + CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(0)); + MinPoint = core::vectorSIMDf(core::dot(BestAxis[0], CachedVertex).x, core::dot(BestAxis[1], CachedVertex).x, core::dot(BestAxis[2], CachedVertex).x); + MaxPoint = MinPoint; + for (uint32_t j = 1u; j < indexcount; j += 1u) + { + CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(j)); + core::vectorSIMDf Proj = core::vectorSIMDf(core::dot(BestAxis[0], CachedVertex).x, core::dot(BestAxis[1], CachedVertex).x, core::dot(BestAxis[2], CachedVertex).x); + MinPoint = core::min(MinPoint, Proj); + MaxPoint = core::max(MaxPoint, Proj); + } + + core::vectorSIMDf OBBDiff = MaxPoint - MinPoint; + float OBBQuality = OBBDiff.x * OBBDiff.y + OBBDiff.y * OBBDiff.z + OBBDiff.z * OBBDiff.x; + + core::vectorSIMDf ABBDiff = AABBMax - AABBMin; + float ABBQuality = ABBDiff.x * ABBDiff.y + ABBDiff.y * ABBDiff.z + ABBDiff.z * ABBDiff.x; + core::matrix3x4SIMD scaleMat; + core::matrix3x4SIMD translationMat; + translationMat.setTranslation(-(MinPoint) / OBBDiff); + scaleMat.setScale(OBBDiff); + TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = core::concatenateBFollowedByA(TransMat, translationMat); + if (ABBQuality < OBBQuality) { + translationMat.setTranslation(-(AABBMin) / ABBDiff); + scaleMat.setScale(ABBDiff); + TransMat = core::matrix3x4SIMD( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0); + TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); + TransMat = core::concatenateBFollowedByA(TransMat, translationMat); + } + + return TransMat; +} +#endif +} // end namespace nbl::asset diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..d55362ef39 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -257,6 +257,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/obb.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index ca82da688a..a6a1cb1bce 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -367,4 +367,17 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA return true; } +hlsl::float32_t3x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) +{ + const auto obbScale = obb.ext * 2.0f; + const auto axesScaleX = obb.axes[0] * obbScale.x; + const auto axesScaleY = obb.axes[1] * obbScale.y; + const auto axesScaleZ = obb.axes[2] * obbScale.z; + return float32_t3x4{ + axesScaleX.x, axesScaleY.x, axesScaleZ.x, obb.mid.x - (0.5 * (axesScaleX.x + axesScaleY.x + axesScaleZ.x)), + axesScaleX.y, axesScaleY.y, axesScaleZ.y, obb.mid.y - (0.5 * (axesScaleX.y + axesScaleY.y + axesScaleZ.y)), + axesScaleX.z, axesScaleY.z, axesScaleZ.z, obb.mid.z - (0.5 * (axesScaleX.z + axesScaleY.z + axesScaleZ.z)), + }; +} + } diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index f477e96cdf..4c7c96953e 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -332,6 +332,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation rasterizationParams.faceCullingMode = EFCM_NONE; rasterizationParams.depthWriteEnable = false; rasterizationParams.depthBoundsTestEnable = false; + rasterizationParams.depthCompareOp = ECO_ALWAYS; rasterizationParams.viewportCount = creationParams.viewportCount; }