feat: godot-engine-source-4.3-stable
This commit is contained in:
parent
c59a7dcade
commit
7125d019b5
11149 changed files with 5070401 additions and 0 deletions
411
engine/thirdparty/embree/kernels/builders/bvh_builder_hair.h
vendored
Normal file
411
engine/thirdparty/embree/kernels/builders/bvh_builder_hair.h
vendored
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../builders/heuristic_binning_array_aligned.h"
|
||||
#include "../builders/heuristic_binning_array_unaligned.h"
|
||||
#include "../builders/heuristic_strand_array.h"
|
||||
|
||||
#define NUM_HAIR_OBJECT_BINS 32
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderHair
|
||||
{
|
||||
/*! settings for builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
size_t finished_range_threshold; //!< finished range threshold
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeFunc,
|
||||
typename SetAABBNodeFunc,
|
||||
typename CreateOBBNodeFunc,
|
||||
typename SetOBBNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor,
|
||||
typename ReportFinishedRangeFunc>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
friend struct BVHBuilderHair;
|
||||
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
|
||||
typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
|
||||
typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
|
||||
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
|
||||
|
||||
static const size_t travCostAligned = 1;
|
||||
static const size_t travCostUnaligned = 5;
|
||||
static const size_t intCost = 6;
|
||||
|
||||
BuilderT (Scene* scene,
|
||||
PrimRef* prims,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeFunc& createAABBNode,
|
||||
const SetAABBNodeFunc& setAABBNode,
|
||||
const CreateOBBNodeFunc& createOBBNode,
|
||||
const SetOBBNodeFunc& setOBBNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const ReportFinishedRangeFunc& reportFinishedRange,
|
||||
const Settings settings)
|
||||
|
||||
: cfg(settings),
|
||||
prims(prims),
|
||||
createAlloc(createAlloc),
|
||||
createAABBNode(createAABBNode),
|
||||
setAABBNode(setAABBNode),
|
||||
createOBBNode(createOBBNode),
|
||||
setOBBNode(setOBBNode),
|
||||
createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor),
|
||||
reportFinishedRange(reportFinishedRange),
|
||||
alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const PrimInfoRange& range)
|
||||
{
|
||||
if (range.size() == 0) return true;
|
||||
unsigned int firstGeomID = prims[range.begin()].geomID();
|
||||
for (size_t i=range.begin()+1; i<range.end(); i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! creates a large leaf that could be larger than supported by the BVH */
|
||||
NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
|
||||
return createLeaf(prims,pinfo,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
PrimInfoRange children[MAX_BRANCHING_FACTOR];
|
||||
unsigned numChildren = 1;
|
||||
children[0] = pinfo;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
int bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (unsigned i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
__aligned(64) PrimInfoRange left, right;
|
||||
if (!sameGeometry(children[bestChild])) {
|
||||
alignedHeuristic.splitByGeometry(children[bestChild],left,right);
|
||||
} else {
|
||||
alignedHeuristic.splitFallback(children[bestChild],left,right);
|
||||
}
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
/* create node */
|
||||
auto node = createAABBNode(alloc);
|
||||
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
|
||||
setAABBNode(node,i,child,children[i].geomBounds);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*! performs split */
|
||||
__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
|
||||
{
|
||||
/* variable to track the SAH of the best splitting approach */
|
||||
float bestSAH = inf;
|
||||
const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
|
||||
const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
|
||||
|
||||
/* try standard binning in aligned space */
|
||||
float alignedObjectSAH = inf;
|
||||
HeuristicBinningSAH::Split alignedObjectSplit;
|
||||
if (aligned) {
|
||||
alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
|
||||
alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
|
||||
bestSAH = min(alignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* try standard binning in unaligned space */
|
||||
UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
|
||||
LinearSpace3fa uspace;
|
||||
float unalignedObjectSAH = inf;
|
||||
if (bestSAH > 0.7f*leafSAH) {
|
||||
uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
|
||||
unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
|
||||
unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
|
||||
bestSAH = min(unalignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* try splitting into two strands */
|
||||
HeuristicStrandSplitSAH::Split strandSplit;
|
||||
float strandSAH = inf;
|
||||
if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
|
||||
strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
|
||||
strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
|
||||
bestSAH = min(strandSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* fallback if SAH heuristics failed */
|
||||
if (unlikely(!std::isfinite(bestSAH)))
|
||||
{
|
||||
alignedHeuristic.deterministic_order(pinfo);
|
||||
alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
/* perform aligned split if this is best */
|
||||
else if (bestSAH == alignedObjectSAH) {
|
||||
alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
/* perform unaligned split if this is best */
|
||||
else if (bestSAH == unalignedObjectSAH) {
|
||||
unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
|
||||
aligned = false;
|
||||
}
|
||||
|
||||
/* perform strand split if this is best */
|
||||
else if (bestSAH == strandSAH) {
|
||||
strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
|
||||
aligned = false;
|
||||
}
|
||||
|
||||
/* can never happen */
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
|
||||
/*! recursive build */
|
||||
NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
|
||||
progressMonitor(pinfo.size());
|
||||
|
||||
PrimInfoRange children[MAX_BRANCHING_FACTOR];
|
||||
|
||||
/* create leaf node */
|
||||
if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
|
||||
alignedHeuristic.deterministic_order(pinfo);
|
||||
return createLargeLeaf(depth,pinfo,alloc);
|
||||
}
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
size_t numChildren = 1;
|
||||
children[0] = pinfo;
|
||||
bool aligned = true;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
ssize_t bestChild = -1;
|
||||
float bestArea = neg_inf;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
if (area(children[i].geomBounds) > bestArea) {
|
||||
bestArea = area(children[i].geomBounds);
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
PrimInfoRange left, right;
|
||||
split(children[bestChild],left,right,aligned);
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
NodeRef node;
|
||||
|
||||
/* create aligned node */
|
||||
if (aligned)
|
||||
{
|
||||
node = createAABBNode(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else {
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* create unaligned node */
|
||||
else
|
||||
{
|
||||
node = createOBBNode(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
|
||||
const OBBox3fa obounds(space,sinfo.geomBounds);
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
|
||||
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
|
||||
const OBBox3fa obounds(space,sinfo.geomBounds);
|
||||
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
|
||||
setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* reports a finished range of primrefs */
|
||||
if (unlikely(alloc_barrier))
|
||||
reportFinishedRange(pinfo);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
PrimRef* prims;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateAABBNodeFunc& createAABBNode;
|
||||
const SetAABBNodeFunc& setAABBNode;
|
||||
const CreateOBBNodeFunc& createOBBNode;
|
||||
const SetOBBNodeFunc& setOBBNode;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
const ReportFinishedRangeFunc& reportFinishedRange;
|
||||
|
||||
private:
|
||||
HeuristicBinningSAH alignedHeuristic;
|
||||
UnalignedHeuristicBinningSAH unalignedHeuristic;
|
||||
HeuristicStrandSplitSAH strandHeuristic;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeFunc,
|
||||
typename SetAABBNodeFunc,
|
||||
typename CreateOBBNodeFunc,
|
||||
typename SetOBBNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor,
|
||||
typename ReportFinishedRangeFunc>
|
||||
|
||||
static NodeRef build (const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeFunc& createAABBNode,
|
||||
const SetAABBNodeFunc& setAABBNode,
|
||||
const CreateOBBNodeFunc& createOBBNode,
|
||||
const SetOBBNodeFunc& setOBBNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const ReportFinishedRangeFunc& reportFinishedRange,
|
||||
Scene* scene,
|
||||
PrimRef* prims,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings settings)
|
||||
{
|
||||
typedef BuilderT<NodeRef,
|
||||
CreateAllocFunc,
|
||||
CreateAABBNodeFunc,SetAABBNodeFunc,
|
||||
CreateOBBNodeFunc,SetOBBNodeFunc,
|
||||
CreateLeafFunc,ProgressMonitor,
|
||||
ReportFinishedRangeFunc> Builder;
|
||||
|
||||
Builder builder(scene,prims,createAlloc,
|
||||
createAABBNode,setAABBNode,
|
||||
createOBBNode,setOBBNode,
|
||||
createLeaf,progressMonitor,reportFinishedRange,settings);
|
||||
|
||||
NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
502
engine/thirdparty/embree/kernels/builders/bvh_builder_morton.h
vendored
Normal file
502
engine/thirdparty/embree/kernels/builders/bvh_builder_morton.h
vendored
Normal file
|
|
@ -0,0 +1,502 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/builder.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderMorton
|
||||
{
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
|
||||
|
||||
/*! settings for morton builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {}
|
||||
|
||||
/*! initialize settings from API settings */
|
||||
Settings (const RTCBuildArguments& settings)
|
||||
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024)
|
||||
{
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
|
||||
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold)
|
||||
: branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold)
|
||||
{
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
};
|
||||
|
||||
/*! Build primitive consisting of morton code and primitive ID. */
|
||||
struct __aligned(8) BuildPrim
|
||||
{
|
||||
union {
|
||||
struct {
|
||||
unsigned int code; //!< morton code
|
||||
unsigned int index; //!< i'th primitive
|
||||
};
|
||||
uint64_t t;
|
||||
};
|
||||
|
||||
/*! interface for radix sort */
|
||||
__forceinline operator unsigned() const { return code; }
|
||||
|
||||
/*! interface for standard sort */
|
||||
__forceinline bool operator<(const BuildPrim &m) const { return code < m.code; }
|
||||
};
|
||||
|
||||
/*! maps bounding box to morton code */
|
||||
struct MortonCodeMapping
|
||||
{
|
||||
static const size_t LATTICE_BITS_PER_DIM = 10;
|
||||
static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM;
|
||||
|
||||
vfloat4 base;
|
||||
vfloat4 scale;
|
||||
|
||||
__forceinline MortonCodeMapping(const BBox3fa& bounds)
|
||||
{
|
||||
base = (vfloat4)bounds.lower;
|
||||
const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower;
|
||||
scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f));
|
||||
}
|
||||
|
||||
__forceinline const vint4 bin (const BBox3fa& box) const
|
||||
{
|
||||
const vfloat4 lower = (vfloat4)box.lower;
|
||||
const vfloat4 upper = (vfloat4)box.upper;
|
||||
const vfloat4 centroid = lower+upper;
|
||||
return vint4((centroid-base)*scale);
|
||||
}
|
||||
|
||||
__forceinline unsigned int code (const BBox3fa& box) const
|
||||
{
|
||||
const vint4 binID = bin(box);
|
||||
const unsigned int x = extract<0>(binID);
|
||||
const unsigned int y = extract<1>(binID);
|
||||
const unsigned int z = extract<2>(binID);
|
||||
const unsigned int xyz = bitInterleave(x,y,z);
|
||||
return xyz;
|
||||
}
|
||||
};
|
||||
|
||||
#if defined (__AVX2__) || defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
/*! for AVX2 there is a fast scalar bitInterleave */
|
||||
struct MortonCodeGenerator
|
||||
{
|
||||
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
|
||||
: mapping(mapping), dest(dest) {}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& b, const unsigned index)
|
||||
{
|
||||
dest->index = index;
|
||||
dest->code = mapping.code(b);
|
||||
dest++;
|
||||
}
|
||||
|
||||
public:
|
||||
const MortonCodeMapping mapping;
|
||||
BuildPrim* dest;
|
||||
size_t currentID;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/*! before AVX2 is it better to use the SSE version of bitInterleave */
|
||||
struct MortonCodeGenerator
|
||||
{
|
||||
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
|
||||
: mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {}
|
||||
|
||||
__forceinline ~MortonCodeGenerator()
|
||||
{
|
||||
if (slots != 0)
|
||||
{
|
||||
const vint4 code = bitInterleave(ax,ay,az);
|
||||
for (size_t i=0; i<slots; i++) {
|
||||
dest[currentID-slots+i].index = ai[i];
|
||||
dest[currentID-slots+i].code = code[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& b, const unsigned index)
|
||||
{
|
||||
const vint4 binID = mapping.bin(b);
|
||||
ax[slots] = extract<0>(binID);
|
||||
ay[slots] = extract<1>(binID);
|
||||
az[slots] = extract<2>(binID);
|
||||
ai[slots] = index;
|
||||
slots++;
|
||||
currentID++;
|
||||
|
||||
if (slots == 4)
|
||||
{
|
||||
const vint4 code = bitInterleave(ax,ay,az);
|
||||
vint4::storeu(&dest[currentID-4],unpacklo(code,ai));
|
||||
vint4::storeu(&dest[currentID-2],unpackhi(code,ai));
|
||||
slots = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
const MortonCodeMapping mapping;
|
||||
BuildPrim* dest;
|
||||
size_t currentID;
|
||||
size_t slots;
|
||||
vint4 ax, ay, az, ai;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Allocator,
|
||||
typename CreateAllocator,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeBoundsFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CalculateBounds,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT : private Settings
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (CreateAllocator& createAllocator,
|
||||
CreateNodeFunc& createNode,
|
||||
SetNodeBoundsFunc& setBounds,
|
||||
CreateLeafFunc& createLeaf,
|
||||
CalculateBounds& calculateBounds,
|
||||
ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
|
||||
: Settings(settings),
|
||||
createAllocator(createAllocator),
|
||||
createNode(createNode),
|
||||
setBounds(setBounds),
|
||||
createLeaf(createLeaf),
|
||||
calculateBounds(calculateBounds),
|
||||
progressMonitor(progressMonitor),
|
||||
morton(nullptr) {}
|
||||
|
||||
ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (depth > maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= maxLeafSize)
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
range<unsigned> children[MAX_BRANCHING_FACTOR];
|
||||
size_t numChildren = 1;
|
||||
children[0] = current;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest number of primitives */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= maxLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == size_t(-1)) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
auto split = children[bestChild].split();
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = split.first;
|
||||
children[numChildren+0] = split.second;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < branchingFactor);
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(alloc,numChildren);
|
||||
|
||||
/* recurse into each child */
|
||||
ReductionTy bounds[MAX_BRANCHING_FACTOR];
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
bounds[i] = createLargeLeaf(depth+1,children[i],alloc);
|
||||
|
||||
return setBounds(node,bounds,numChildren);
|
||||
}
|
||||
|
||||
/*! recreates morton codes when reaching a region where all codes are identical */
|
||||
__noinline void recreateMortonCodes(const range<unsigned>& current) const
|
||||
{
|
||||
/* fast path for small ranges */
|
||||
if (likely(current.size() < 1024))
|
||||
{
|
||||
/*! recalculate centroid bounds */
|
||||
BBox3fa centBounds(empty);
|
||||
for (size_t i=current.begin(); i<current.end(); i++)
|
||||
centBounds.extend(center2(calculateBounds(morton[i])));
|
||||
|
||||
/* recalculate morton codes */
|
||||
MortonCodeMapping mapping(centBounds);
|
||||
for (size_t i=current.begin(); i<current.end(); i++)
|
||||
morton[i].code = mapping.code(calculateBounds(morton[i]));
|
||||
|
||||
/* sort morton codes */
|
||||
std::sort(morton+current.begin(),morton+current.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
/*! recalculate centroid bounds */
|
||||
auto calculateCentBounds = [&] ( const range<unsigned>& r ) {
|
||||
BBox3fa centBounds = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
centBounds.extend(center2(calculateBounds(morton[i])));
|
||||
return centBounds;
|
||||
};
|
||||
const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024),
|
||||
BBox3fa(empty), calculateCentBounds, BBox3fa::merge);
|
||||
|
||||
/* recalculate morton codes */
|
||||
MortonCodeMapping mapping(centBounds);
|
||||
parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
morton[i].code = mapping.code(calculateBounds(morton[i]));
|
||||
}
|
||||
});
|
||||
|
||||
/*! sort morton codes */
|
||||
#if defined(TASKING_TBB)
|
||||
tbb::parallel_sort(morton+current.begin(),morton+current.end());
|
||||
#else
|
||||
radixsort32(morton+current.begin(),current.size());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const
|
||||
{
|
||||
const unsigned int code_start = morton[current.begin()].code;
|
||||
const unsigned int code_end = morton[current.end()-1].code;
|
||||
unsigned int bitpos = lzcnt(code_start^code_end);
|
||||
|
||||
/* if all items mapped to same morton code, then re-create new morton codes for the items */
|
||||
if (unlikely(bitpos == 32))
|
||||
{
|
||||
recreateMortonCodes(current);
|
||||
const unsigned int code_start = morton[current.begin()].code;
|
||||
const unsigned int code_end = morton[current.end()-1].code;
|
||||
bitpos = lzcnt(code_start^code_end);
|
||||
|
||||
/* if the morton code is still the same, goto fall back split */
|
||||
if (unlikely(bitpos == 32)) {
|
||||
current.split(left,right);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* split the items at the topmost different morton code bit */
|
||||
const unsigned int bitpos_diff = 31-bitpos;
|
||||
const unsigned int bitmask = 1 << bitpos_diff;
|
||||
|
||||
/* find location where bit differs using binary search */
|
||||
unsigned begin = current.begin();
|
||||
unsigned end = current.end();
|
||||
while (begin + 1 != end) {
|
||||
const unsigned mid = (begin+end)/2;
|
||||
const unsigned bit = morton[mid].code & bitmask;
|
||||
if (bit == 0) begin = mid; else end = mid;
|
||||
}
|
||||
unsigned center = end;
|
||||
#if defined(DEBUG)
|
||||
for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0);
|
||||
for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask);
|
||||
#endif
|
||||
|
||||
left = make_range(current.begin(),center);
|
||||
right = make_range(center,current.end());
|
||||
}
|
||||
|
||||
ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAllocator();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/* create leaf node */
|
||||
if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize))
|
||||
return createLargeLeaf(depth,current,alloc);
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
range<unsigned> children[MAX_BRANCHING_FACTOR];
|
||||
split(current,children[0],children[1]);
|
||||
size_t numChildren = 2;
|
||||
|
||||
while (numChildren < branchingFactor)
|
||||
{
|
||||
/* find best child with largest number of primitives */
|
||||
int bestChild = -1;
|
||||
unsigned bestItems = 0;
|
||||
for (unsigned int i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
if (children[i].size() > bestItems) {
|
||||
bestItems = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
range<unsigned> left, right;
|
||||
split(children[bestChild],left,right);
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
/* create leaf node if no split is possible */
|
||||
if (unlikely(numChildren == 1))
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* allocate node */
|
||||
auto node = createNode(alloc,numChildren);
|
||||
|
||||
/* process top parts of tree parallel */
|
||||
ReductionTy bounds[MAX_BRANCHING_FACTOR];
|
||||
if (current.size() > singleThreadThreshold)
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
bounds[i] = recurse(depth+1,children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/* finish tree sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
bounds[i] = recurse(depth+1,children[i],alloc,false);
|
||||
}
|
||||
|
||||
return setBounds(node,bounds,numChildren);
|
||||
}
|
||||
|
||||
/* build function */
|
||||
ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives)
|
||||
{
|
||||
/* sort morton codes */
|
||||
morton = src;
|
||||
radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold);
|
||||
|
||||
/* build BVH */
|
||||
const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
public:
|
||||
CreateAllocator& createAllocator;
|
||||
CreateNodeFunc& createNode;
|
||||
SetNodeBoundsFunc& setBounds;
|
||||
CreateLeafFunc& createLeaf;
|
||||
CalculateBounds& calculateBounds;
|
||||
ProgressMonitor& progressMonitor;
|
||||
|
||||
public:
|
||||
BuildPrim* morton;
|
||||
};
|
||||
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetBoundsFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CalculateBoundsFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAllocator,
|
||||
CreateNodeFunc createNode,
|
||||
SetBoundsFunc setBounds,
|
||||
CreateLeafFunc createLeaf,
|
||||
CalculateBoundsFunc calculateBounds,
|
||||
ProgressMonitor progressMonitor,
|
||||
BuildPrim* src,
|
||||
BuildPrim* tmp,
|
||||
size_t numPrimitives,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuilderT<
|
||||
ReductionTy,
|
||||
decltype(createAllocator()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
SetBoundsFunc,
|
||||
CreateLeafFunc,
|
||||
CalculateBoundsFunc,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
Builder builder(createAllocator,
|
||||
createNode,
|
||||
setBounds,
|
||||
createLeaf,
|
||||
calculateBounds,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
return builder.build(src,tmp,numPrimitives);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
693
engine/thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
vendored
Normal file
693
engine/thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
vendored
Normal file
|
|
@ -0,0 +1,693 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#define MBLUR_NUM_TEMPORAL_BINS 2
|
||||
#define MBLUR_NUM_OBJECT_BINS 32
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "heuristic_binning_array_aligned.h"
|
||||
#include "heuristic_timesplit_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<typename T>
|
||||
struct SharedVector
|
||||
{
|
||||
__forceinline SharedVector() {}
|
||||
|
||||
__forceinline SharedVector(T* ptr, size_t refCount = 1)
|
||||
: prims(ptr), refCount(refCount) {}
|
||||
|
||||
__forceinline void incRef() {
|
||||
refCount++;
|
||||
}
|
||||
|
||||
__forceinline void decRef()
|
||||
{
|
||||
if (--refCount == 0)
|
||||
delete prims;
|
||||
}
|
||||
|
||||
T* prims;
|
||||
size_t refCount;
|
||||
};
|
||||
|
||||
template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
|
||||
struct LocalChildListT
|
||||
{
|
||||
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
||||
|
||||
__forceinline LocalChildListT (const BuildRecord& record)
|
||||
: numChildren(1), numSharedPrimVecs(1)
|
||||
{
|
||||
/* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
|
||||
children[0] = record;
|
||||
primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
|
||||
}
|
||||
|
||||
__forceinline ~LocalChildListT()
|
||||
{
|
||||
for (size_t i = 0; i < numChildren; i++)
|
||||
primvecs[i]->decRef();
|
||||
}
|
||||
|
||||
__forceinline BuildRecord& operator[] ( const size_t i ) {
|
||||
return children[i];
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return numChildren;
|
||||
}
|
||||
|
||||
__forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
|
||||
{
|
||||
SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
|
||||
if (lrecord.prims.prims == bsharedPrimVec->prims) {
|
||||
primvecs[bestChild] = bsharedPrimVec;
|
||||
bsharedPrimVec->incRef();
|
||||
}
|
||||
else {
|
||||
primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
|
||||
}
|
||||
|
||||
if (rrecord.prims.prims == bsharedPrimVec->prims) {
|
||||
primvecs[numChildren] = bsharedPrimVec;
|
||||
bsharedPrimVec->incRef();
|
||||
}
|
||||
else {
|
||||
primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
|
||||
}
|
||||
bsharedPrimVec->decRef();
|
||||
new_vector.release();
|
||||
|
||||
children[bestChild] = lrecord;
|
||||
children[numChildren] = rrecord;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
public:
|
||||
array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
|
||||
array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
|
||||
size_t numChildren;
|
||||
|
||||
array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
|
||||
size_t numSharedPrimVecs;
|
||||
};
|
||||
|
||||
template<typename Mesh>
|
||||
struct RecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
|
||||
__forceinline RecalculatePrimRef (Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Mesh* mesh = scene->get<Mesh>(geomID);
|
||||
const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
// __noinline is workaround for ICC16 bug under MacOSX
|
||||
__noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Mesh* mesh = scene->get<Mesh>(geomID);
|
||||
const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
|
||||
}
|
||||
|
||||
// __noinline is workaround for ICC16 bug under MacOSX
|
||||
__noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
||||
return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
|
||||
}
|
||||
};
|
||||
|
||||
struct VirtualRecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
const SubGridBuildData * const sgrids;
|
||||
|
||||
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
|
||||
: scene(scene), sgrids(sgrids) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
||||
{
|
||||
const unsigned geomID = prim.geomID();
|
||||
const unsigned primID = prim.primID();
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
||||
return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
|
||||
}
|
||||
};
|
||||
|
||||
struct BVHBuilderMSMBlur
|
||||
{
|
||||
/*! settings for msmblur builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
|
||||
travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
|
||||
singleThreadThreshold(1024) {}
|
||||
|
||||
|
||||
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
|
||||
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
|
||||
{
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
float travCost; //!< estimated cost of one traversal step
|
||||
float intCost; //!< estimated cost of one primitive intersection
|
||||
bool singleLeafTimeSegment; //!< split time to single time range
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
};
|
||||
|
||||
struct BuildRecord
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecord () {}
|
||||
|
||||
__forceinline BuildRecord (size_t depth)
|
||||
: depth(depth) {}
|
||||
|
||||
__forceinline BuildRecord (const SetMB& prims, size_t depth)
|
||||
: depth(depth), prims(prims) {}
|
||||
|
||||
__forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
|
||||
return a.prims.size() < b.prims.size();
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return prims.size();
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth; //!< Depth of the root of this subtree.
|
||||
SetMB prims; //!< The list of primitives.
|
||||
};
|
||||
|
||||
struct BuildRecordSplit : public BuildRecord
|
||||
{
|
||||
__forceinline BuildRecordSplit () {}
|
||||
|
||||
__forceinline BuildRecordSplit (size_t depth)
|
||||
: BuildRecord(depth) {}
|
||||
|
||||
__forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
|
||||
: BuildRecord(record), split(split) {}
|
||||
|
||||
BinSplit<MBLUR_NUM_OBJECT_BINS> split;
|
||||
};
|
||||
|
||||
template<
|
||||
typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename Allocator,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
||||
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
|
||||
typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (MemoryMonitorInterface* device,
|
||||
const RecalculatePrimRef recalculatePrimRef,
|
||||
const CreateAllocFunc createAlloc,
|
||||
const CreateNodeFunc createNode,
|
||||
const SetNodeFunc setNode,
|
||||
const CreateLeafFunc createLeaf,
|
||||
const ProgressMonitor progressMonitor,
|
||||
const Settings& settings)
|
||||
: cfg(settings),
|
||||
heuristicObjectSplit(),
|
||||
heuristicTemporalSplit(device, recalculatePrimRef),
|
||||
recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor)
|
||||
{
|
||||
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set)
|
||||
{
|
||||
/* first try standard object split */
|
||||
const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
|
||||
const float object_split_sah = object_split.splitSAH();
|
||||
|
||||
/* test temporal splits only when object split was bad */
|
||||
const float leaf_sah = set.leafSAH(cfg.logBlockSize);
|
||||
if (object_split_sah < 0.50f*leaf_sah)
|
||||
return object_split;
|
||||
|
||||
/* do temporal splits only if the time range is big enough */
|
||||
if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
|
||||
{
|
||||
const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
|
||||
const float temporal_split_sah = temporal_split.splitSAH();
|
||||
|
||||
/* take temporal split if it improved SAH */
|
||||
if (temporal_split_sah < object_split_sah)
|
||||
return temporal_split;
|
||||
}
|
||||
|
||||
return object_split;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
/* perform object split */
|
||||
if (likely(split.data == Split::SPLIT_OBJECT)) {
|
||||
heuristicObjectSplit.split(split,set,lset,rset);
|
||||
}
|
||||
/* perform temporal split */
|
||||
else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
|
||||
return heuristicTemporalSplit.split(split,set,lset,rset);
|
||||
}
|
||||
/* perform fallback split */
|
||||
else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
|
||||
set.deterministic_order();
|
||||
splitFallback(set,lset,rset);
|
||||
}
|
||||
/* split by geometry */
|
||||
else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
|
||||
set.deterministic_order();
|
||||
splitByGeometry(set,lset,rset);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
return std::unique_ptr<mvector<PrimRefMB>>();
|
||||
}
|
||||
|
||||
/*! finds the best fallback split */
|
||||
__noinline Split findFallback(const SetMB& set)
|
||||
{
|
||||
/* split if primitives are not from same geometry */
|
||||
if (!sameGeometry(set))
|
||||
return Split(0.0f,Split::SPLIT_GEOMID);
|
||||
|
||||
/* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
|
||||
if (cfg.singleLeafTimeSegment)
|
||||
{
|
||||
/* test if one primitive has more than one time segment in time range, if so split time */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const PrimRefMB& prim = (*set.prims)[i];
|
||||
const range<int> itime_range = prim.timeSegmentRange(set.time_range);
|
||||
const int localTimeSegments = itime_range.size();
|
||||
assert(localTimeSegments > 0);
|
||||
if (localTimeSegments > 1) {
|
||||
const int icenter = (itime_range.begin() + itime_range.end())/2;
|
||||
const float splitTime = prim.timeStep(icenter);
|
||||
return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* otherwise return fallback split */
|
||||
return Split(0.0f,Split::SPLIT_FALLBACK);
|
||||
}
|
||||
|
||||
/*! performs fallback split */
|
||||
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end + 1) / 2;
|
||||
|
||||
PrimInfoMB linfo = empty;
|
||||
for (size_t i=begin; i<center; i++)
|
||||
linfo.add_primref(prims[i]);
|
||||
|
||||
PrimInfoMB rinfo = empty;
|
||||
for (size_t i=center; i<end; i++)
|
||||
rinfo.add_primref(prims[i]);
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const SetMB& set)
|
||||
{
|
||||
if (set.size() == 0) return true;
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
unsigned int firstGeomID = prims[begin].geomID();
|
||||
for (size_t i=begin+1; i<end; i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* split by geometry ID */
|
||||
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(set.size() > 1);
|
||||
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
|
||||
PrimInfoMB left(empty);
|
||||
PrimInfoMB right(empty);
|
||||
unsigned int geomID = prims[begin].geomID();
|
||||
size_t center = serial_partitioning(prims.data(),begin,end,left,right,
|
||||
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
|
||||
|
||||
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (in.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* replace already found split by fallback split */
|
||||
const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
|
||||
|
||||
/* special case when directly creating leaf without any splits that could shrink time_range */
|
||||
bool force_split = false;
|
||||
if (current.depth == 1 && current.size() > 0)
|
||||
{
|
||||
BBox1f c = empty;
|
||||
BBox1f p = current.prims.time_range;
|
||||
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
|
||||
mvector<PrimRefMB>& prims = *current.prims.prims;
|
||||
c.extend(prims[i].time_range);
|
||||
}
|
||||
|
||||
force_split = c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
|
||||
return createLeaf(current,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
bool hasTimeSplits = false;
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildListSplit children(current);
|
||||
|
||||
do {
|
||||
/* find best child with largest bounding box area */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
|
||||
continue;
|
||||
|
||||
force_split = false;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform best found split */
|
||||
BuildRecordSplit& brecord = children[bestChild];
|
||||
BuildRecordSplit lrecord(current.depth+1);
|
||||
BuildRecordSplit rrecord(current.depth+1);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
hasTimeSplits |= new_vector != nullptr;
|
||||
|
||||
/* find new splits */
|
||||
lrecord.split = findFallback(lrecord.prims);
|
||||
rrecord.split = findFallback(rrecord.prims);
|
||||
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = in.prims.time_range;
|
||||
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
|
||||
|
||||
/* recurse into each child and perform reduction */
|
||||
LBBox3fa gbounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
|
||||
setNode(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
/* calculate geometry bounds of this node */
|
||||
if (hasTimeSplits)
|
||||
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
||||
else
|
||||
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= cfg.singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/*! find best split */
|
||||
const Split csplit = find(current.prims);
|
||||
|
||||
/*! compute leaf and split cost */
|
||||
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
|
||||
const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
|
||||
assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
|
||||
|
||||
/*! create a leaf node when threshold reached or SAH tells us to stop */
|
||||
if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
|
||||
current.prims.deterministic_order();
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/*! perform initial split */
|
||||
SetMB lprims,rprims;
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
|
||||
bool hasTimeSplits = new_vector != nullptr;
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildList children(current);
|
||||
{
|
||||
BuildRecord lrecord(lprims,current.depth+1);
|
||||
BuildRecord rrecord(rprims,current.depth+1);
|
||||
children.split(0,lrecord,rrecord,std::move(new_vector));
|
||||
}
|
||||
|
||||
/*! split until node is full or SAH tells us to stop */
|
||||
while (children.size() < cfg.branchingFactor)
|
||||
{
|
||||
/*! find best child to split */
|
||||
float bestArea = neg_inf;
|
||||
ssize_t bestChild = -1;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
if (children[i].size() <= cfg.minLeafSize) continue;
|
||||
if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
|
||||
bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform split */
|
||||
BuildRecord& brecord = children[bestChild];
|
||||
BuildRecord lrecord(current.depth+1);
|
||||
BuildRecord rrecord(current.depth+1);
|
||||
Split csplit = find(brecord.prims);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
hasTimeSplits |= new_vector != nullptr;
|
||||
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
||||
}
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* sort buildrecords for simpler shadow ray traversal */
|
||||
//std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
|
||||
|
||||
/*! create an inner node */
|
||||
auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
|
||||
LBBox3fa gbounds = empty;
|
||||
|
||||
/* spawn tasks */
|
||||
if (unlikely(current.size() > cfg.singleThreadThreshold))
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
/*! merge bounding boxes */
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
/* recurse into each child */
|
||||
else
|
||||
{
|
||||
//for (size_t i=0; i<children.size(); i++)
|
||||
for (ssize_t i=children.size()-1; i>=0; i--) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
gbounds.extend(values[i].lbounds);
|
||||
}
|
||||
}
|
||||
|
||||
setNode(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
/* calculate geometry bounds of this node */
|
||||
if (unlikely(hasTimeSplits))
|
||||
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
||||
else
|
||||
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/*! builder entry function */
|
||||
__forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
|
||||
{
|
||||
const SetMB set(pinfo,&prims);
|
||||
auto ret = recurse(BuildRecord(set,1),nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
|
||||
HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
|
||||
const RecalculatePrimRef recalculatePrimRef;
|
||||
const CreateAllocFunc createAlloc;
|
||||
const CreateNodeFunc createNode;
|
||||
const SetNodeFunc setNode;
|
||||
const CreateLeafFunc createLeaf;
|
||||
const ProgressMonitor progressMonitor;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename SetNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitorFunc>
|
||||
|
||||
static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
|
||||
const PrimInfoMB& pinfo,
|
||||
MemoryMonitorInterface* device,
|
||||
const RecalculatePrimRef recalculatePrimRef,
|
||||
const CreateAllocFunc createAlloc,
|
||||
const CreateNodeFunc createNode,
|
||||
const SetNodeFunc setNode,
|
||||
const CreateLeafFunc createLeaf,
|
||||
const ProgressMonitorFunc progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuilderT<
|
||||
NodeRef,
|
||||
RecalculatePrimRef,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
SetNodeFunc,
|
||||
CreateLeafFunc,
|
||||
ProgressMonitorFunc> Builder;
|
||||
|
||||
Builder builder(device,
|
||||
recalculatePrimRef,
|
||||
createAlloc,
|
||||
createNode,
|
||||
setNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
|
||||
return builder(prims,pinfo);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
526
engine/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
vendored
Normal file
526
engine/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
vendored
Normal file
|
|
@ -0,0 +1,526 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
#include "../builders/heuristic_binning_array_aligned.h"
|
||||
#include "../builders/heuristic_binning_array_unaligned.h"
|
||||
#include "../builders/heuristic_timesplit_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct BVHBuilderHairMSMBlur
|
||||
{
|
||||
/*! settings for msmblur builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
};
|
||||
|
||||
struct BuildRecord
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecord () {}
|
||||
|
||||
__forceinline BuildRecord (size_t depth)
|
||||
: depth(depth) {}
|
||||
|
||||
__forceinline BuildRecord (const SetMB& prims, size_t depth)
|
||||
: depth(depth), prims(prims) {}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return prims.size();
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth; //!< depth of the root of this subtree
|
||||
SetMB prims; //!< the list of primitives
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeMBFunc,
|
||||
typename SetAABBNodeMBFunc,
|
||||
typename CreateOBBNodeMBFunc,
|
||||
typename SetOBBNodeMBFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
|
||||
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
||||
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
|
||||
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
|
||||
|
||||
typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
|
||||
typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
|
||||
typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
|
||||
|
||||
public:
|
||||
|
||||
BuilderT (Scene* scene,
|
||||
const RecalculatePrimRef& recalculatePrimRef,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB,
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB,
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB,
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings settings)
|
||||
|
||||
: cfg(settings),
|
||||
scene(scene),
|
||||
recalculatePrimRef(recalculatePrimRef),
|
||||
createAlloc(createAlloc),
|
||||
createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
|
||||
createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
|
||||
createLeaf(createLeaf),
|
||||
progressMonitor(progressMonitor),
|
||||
unalignedHeuristic(scene),
|
||||
temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
|
||||
|
||||
private:
|
||||
|
||||
/*! checks if all primitives are from the same geometry */
|
||||
__forceinline bool sameGeometry(const SetMB& set)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
unsigned int firstGeomID = prims[set.begin()].geomID();
|
||||
for (size_t i=set.begin()+1; i<set.end(); i++) {
|
||||
if (prims[i].geomID() != firstGeomID){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! performs some split if SAH approaches fail */
|
||||
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfoMB linfo = empty;
|
||||
for (size_t i=begin; i<center; i++)
|
||||
linfo.add_primref(prims[i]);
|
||||
|
||||
PrimInfoMB rinfo = empty;
|
||||
for (size_t i=center; i<end; i++)
|
||||
rinfo.add_primref(prims[i]);
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(set.size() > 1);
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB linfo(empty);
|
||||
PrimInfoMB rinfo(empty);
|
||||
unsigned int geomID = (*set.prims)[begin].geomID();
|
||||
size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
|
||||
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
|
||||
|
||||
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
/*! creates a large leaf that could be larger than supported by the BVH */
|
||||
NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (current.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* special case when directly creating leaf without any splits that could shrink time_range */
|
||||
bool force_split = false;
|
||||
if (current.depth == 1 && current.size() > 0)
|
||||
{
|
||||
BBox1f c = empty;
|
||||
BBox1f p = current.prims.time_range;
|
||||
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
|
||||
mvector<PrimRefMB>& prims = *current.prims.prims;
|
||||
c.extend(prims[i].time_range);
|
||||
}
|
||||
|
||||
force_split = c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
|
||||
return createLeaf(current.prims,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
LocalChildList children(current);
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
int bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (unsigned i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
|
||||
continue;
|
||||
|
||||
force_split = false;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].size() > bestSize) {
|
||||
bestSize = children[i].size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
if (!sameGeometry(children[bestChild].prims)) {
|
||||
splitByGeometry(children[bestChild].prims,left.prims,right.prims);
|
||||
} else {
|
||||
splitFallback(children[bestChild].prims,left.prims,right.prims);
|
||||
}
|
||||
children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
bool timesplit = false;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
timesplit |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create node */
|
||||
NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
bounds.extend(values[i].lbounds);
|
||||
}
|
||||
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
if (timesplit)
|
||||
bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/*! performs split */
|
||||
std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
|
||||
{
|
||||
/* variable to track the SAH of the best splitting approach */
|
||||
float bestSAH = inf;
|
||||
const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
|
||||
|
||||
/* perform standard binning in aligned space */
|
||||
HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
|
||||
float alignedObjectSAH = alignedObjectSplit.splitSAH();
|
||||
bestSAH = min(alignedObjectSAH,bestSAH);
|
||||
|
||||
/* perform standard binning in unaligned space */
|
||||
UnalignedHeuristicBinning::Split unalignedObjectSplit;
|
||||
LinearSpace3fa uspace;
|
||||
float unalignedObjectSAH = inf;
|
||||
if (alignedObjectSAH > 0.7f*leafSAH) {
|
||||
uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
|
||||
const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
|
||||
unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
|
||||
unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
|
||||
bestSAH = min(unalignedObjectSAH,bestSAH);
|
||||
}
|
||||
|
||||
/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
|
||||
float temporal_split_sah = inf;
|
||||
typename HeuristicTemporal::Split temporal_split;
|
||||
if (bestSAH > 0.5f*leafSAH) {
|
||||
if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
|
||||
temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
|
||||
temporal_split_sah = temporal_split.splitSAH();
|
||||
bestSAH = min(temporal_split_sah,bestSAH);
|
||||
}
|
||||
}
|
||||
|
||||
/* perform fallback split if SAH heuristics failed */
|
||||
if (unlikely(!std::isfinite(bestSAH))) {
|
||||
current.prims.deterministic_order();
|
||||
splitFallback(current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
/* perform aligned split if this is best */
|
||||
else if (likely(bestSAH == alignedObjectSAH)) {
|
||||
alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
/* perform unaligned split if this is best */
|
||||
else if (likely(bestSAH == unalignedObjectSAH)) {
|
||||
unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
|
||||
aligned = false;
|
||||
}
|
||||
/* perform temporal split if this is best */
|
||||
else if (likely(bestSAH == temporal_split_sah)) {
|
||||
timesplit = true;
|
||||
return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
return std::unique_ptr<mvector<PrimRefMB>>();
|
||||
}
|
||||
|
||||
/*! recursive build */
|
||||
NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/* create leaf node */
|
||||
if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
|
||||
current.prims.deterministic_order();
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/* fill all children by always splitting the one with the largest surface area */
|
||||
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
||||
LocalChildList children(current);
|
||||
bool aligned = true;
|
||||
bool timesplit = false;
|
||||
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
ssize_t bestChild = -1;
|
||||
float bestArea = neg_inf;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].size() <= cfg.minLeafSize)
|
||||
continue;
|
||||
|
||||
/* remember child with largest area */
|
||||
const float A = children[i].prims.halfArea();
|
||||
if (A > bestArea) {
|
||||
bestArea = children[i].prims.halfArea();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
|
||||
children.split(bestChild,left,right,std::move(new_vector));
|
||||
|
||||
} while (children.size() < cfg.branchingFactor);
|
||||
|
||||
/* detect time_ranges that have shrunken */
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const BBox1f c = children[i].prims.time_range;
|
||||
const BBox1f p = current.prims.time_range;
|
||||
timesplit |= c.lower > p.lower || c.upper < p.upper;
|
||||
}
|
||||
|
||||
/* create time split node */
|
||||
if (timesplit)
|
||||
{
|
||||
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequential */
|
||||
else {
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
}
|
||||
}
|
||||
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
|
||||
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
/* create aligned node */
|
||||
else if (aligned)
|
||||
{
|
||||
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
cbounds[i] = values[i].lbounds;
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++)
|
||||
bounds.extend(cbounds[i]);
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
bounds.extend(values[i].lbounds);
|
||||
}
|
||||
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
/* create unaligned node */
|
||||
else
|
||||
{
|
||||
const NodeRef node = createOBBNodeMB(alloc);
|
||||
|
||||
/* spawn tasks or ... */
|
||||
if (current.size() > SINGLE_THREADED_THRESHOLD)
|
||||
{
|
||||
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
|
||||
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
|
||||
const auto child = recurse(children[i],nullptr,true);
|
||||
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
}
|
||||
/* ... continue sequentially */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<children.size(); i++) {
|
||||
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
|
||||
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
|
||||
const auto child = recurse(children[i],alloc,false);
|
||||
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
|
||||
return NodeRecordMB4D(node,bounds,current.prims.time_range);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! entry point into builder */
|
||||
NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
|
||||
{
|
||||
BuildRecord record(SetMB(pinfo,&prims),1);
|
||||
auto root = recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
Scene* scene;
|
||||
const RecalculatePrimRef& recalculatePrimRef;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB;
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB;
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB;
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
|
||||
private:
|
||||
HeuristicBinning alignedHeuristic;
|
||||
UnalignedHeuristicBinning unalignedHeuristic;
|
||||
HeuristicTemporal temporalSplitHeuristic;
|
||||
};
|
||||
|
||||
template<typename NodeRef,
|
||||
typename RecalculatePrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateAABBNodeMBFunc,
|
||||
typename SetAABBNodeMBFunc,
|
||||
typename CreateOBBNodeMBFunc,
|
||||
typename SetOBBNodeMBFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
|
||||
const RecalculatePrimRef& recalculatePrimRef,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateAABBNodeMBFunc& createAABBNodeMB,
|
||||
const SetAABBNodeMBFunc& setAABBNodeMB,
|
||||
const CreateOBBNodeMBFunc& createOBBNodeMB,
|
||||
const SetOBBNodeMBFunc& setOBBNodeMB,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings settings)
|
||||
{
|
||||
typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
|
||||
CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
|
||||
CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
|
||||
CreateLeafFunc,ProgressMonitor> Builder;
|
||||
|
||||
Builder builder(scene,recalculatePrimRef,createAlloc,
|
||||
createAABBNodeMB,setAABBNodeMB,
|
||||
createOBBNodeMB,setOBBNodeMB,
|
||||
createLeaf,progressMonitor,settings);
|
||||
|
||||
return builder(prims,pinfo);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
664
engine/thirdparty/embree/kernels/builders/bvh_builder_sah.h
vendored
Normal file
664
engine/thirdparty/embree/kernels/builders/bvh_builder_sah.h
vendored
Normal file
|
|
@ -0,0 +1,664 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning_array_aligned.h"
|
||||
#include "heuristic_spatial_array.h"
|
||||
#include "heuristic_openmerge_array.h"
|
||||
|
||||
#define NUM_OBJECT_BINS 32
|
||||
#define NUM_SPATIAL_BINS 16
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
MAYBE_UNUSED static const float travCost = 1.0f;
|
||||
MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
|
||||
|
||||
struct GeneralBVHBuilder
|
||||
{
|
||||
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
|
||||
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
|
||||
|
||||
|
||||
/*! settings for SAH builder */
|
||||
struct Settings
|
||||
{
|
||||
/*! default settings */
|
||||
Settings ()
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
|
||||
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
|
||||
|
||||
/*! initialize settings from API settings */
|
||||
Settings (const RTCBuildArguments& settings)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
|
||||
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
|
||||
{
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize);
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
|
||||
if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
|
||||
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
|
||||
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
|
||||
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
|
||||
{
|
||||
minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t branchingFactor; //!< branching factor of BVH to build
|
||||
size_t maxDepth; //!< maximum depth of BVH to build
|
||||
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
||||
size_t minLeafSize; //!< minimum size of a leaf
|
||||
size_t maxLeafSize; //!< maximum size of a leaf
|
||||
float travCost; //!< estimated cost of one traversal step
|
||||
float intCost; //!< estimated cost of one primitive intersection
|
||||
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
||||
size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
|
||||
};
|
||||
|
||||
/*! recursive state of builder */
|
||||
template<typename Set, typename Split>
|
||||
struct BuildRecordT
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRecordT () {}
|
||||
|
||||
__forceinline BuildRecordT (size_t depth)
|
||||
: depth(depth), alloc_barrier(false), prims(empty) {}
|
||||
|
||||
__forceinline BuildRecordT (size_t depth, const Set& prims)
|
||||
: depth(depth), alloc_barrier(false), prims(prims) {}
|
||||
|
||||
__forceinline BBox3fa bounds() const { return prims.geomBounds; }
|
||||
|
||||
__forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
|
||||
__forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
|
||||
|
||||
__forceinline size_t size() const { return prims.size(); }
|
||||
|
||||
public:
|
||||
size_t depth; //!< Depth of the root of this subtree.
|
||||
bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
|
||||
Set prims; //!< The list of primitives.
|
||||
};
|
||||
|
||||
template<typename PrimRef, typename Set>
|
||||
struct DefaultCanCreateLeafFunc
|
||||
{
|
||||
__forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
|
||||
};
|
||||
|
||||
template<typename PrimRef, typename Set>
|
||||
struct DefaultCanCreateLeafSplitFunc
|
||||
{
|
||||
__forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
|
||||
};
|
||||
|
||||
template<typename BuildRecord,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename ReductionTy,
|
||||
typename Allocator,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
class BuilderT
|
||||
{
|
||||
friend struct GeneralBVHBuilder;
|
||||
|
||||
BuilderT (PrimRef* prims,
|
||||
Heuristic& heuristic,
|
||||
const CreateAllocFunc& createAlloc,
|
||||
const CreateNodeFunc& createNode,
|
||||
const UpdateNodeFunc& updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings) :
|
||||
cfg(settings),
|
||||
prims(prims),
|
||||
heuristic(heuristic),
|
||||
createAlloc(createAlloc),
|
||||
createNode(createNode),
|
||||
updateNode(updateNode),
|
||||
createLeaf(createLeaf),
|
||||
canCreateLeaf(canCreateLeaf),
|
||||
canCreateLeafSplit(canCreateLeafSplit),
|
||||
progressMonitor(progressMonitor)
|
||||
{
|
||||
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
|
||||
}
|
||||
|
||||
const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
|
||||
{
|
||||
/* this should never occur but is a fatal error */
|
||||
if (current.depth > cfg.maxDepth)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
||||
|
||||
/* create leaf for few primitives */
|
||||
if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
|
||||
return createLeaf(prims,current.prims,alloc);
|
||||
|
||||
/* fill all children by always splitting the largest one */
|
||||
ReductionTy values[MAX_BRANCHING_FACTOR];
|
||||
BuildRecord children[MAX_BRANCHING_FACTOR];
|
||||
size_t numChildren = 1;
|
||||
children[0] = current;
|
||||
do {
|
||||
|
||||
/* find best child with largest bounding box area */
|
||||
size_t bestChild = -1;
|
||||
size_t bestSize = 0;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
|
||||
continue;
|
||||
|
||||
/* remember child with largest size */
|
||||
if (children[i].prims.size() > bestSize) {
|
||||
bestSize = children[i].prims.size();
|
||||
bestChild = i;
|
||||
}
|
||||
}
|
||||
if (bestChild == (size_t)-1) break;
|
||||
|
||||
/*! split best child into left and right child */
|
||||
BuildRecord left(current.depth+1);
|
||||
BuildRecord right(current.depth+1);
|
||||
if (!canCreateLeaf(prims,children[bestChild].prims)) {
|
||||
canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
|
||||
} else {
|
||||
heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
|
||||
}
|
||||
|
||||
/* add new children left and right */
|
||||
children[bestChild] = children[numChildren-1];
|
||||
children[numChildren-1] = left;
|
||||
children[numChildren+0] = right;
|
||||
numChildren++;
|
||||
|
||||
} while (numChildren < cfg.branchingFactor);
|
||||
|
||||
/* set barrier for primrefarrayalloc */
|
||||
if (unlikely(current.size() > cfg.primrefarrayalloc))
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
|
||||
|
||||
/* create node */
|
||||
auto node = createNode(children,numChildren,alloc);
|
||||
|
||||
/* recurse into each child and perform reduction */
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
values[i] = createLargeLeaf(children[i],alloc);
|
||||
|
||||
/* perform reduction */
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
|
||||
const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
|
||||
{
|
||||
/* get thread local allocator */
|
||||
if (!alloc)
|
||||
alloc = createAlloc();
|
||||
|
||||
/* call memory monitor function to signal progress */
|
||||
if (toplevel && current.size() <= cfg.singleThreadThreshold)
|
||||
progressMonitor(current.size());
|
||||
|
||||
/*! find best split */
|
||||
auto split = heuristic.find(current.prims,cfg.logBlockSize);
|
||||
|
||||
/*! compute leaf and split cost */
|
||||
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
|
||||
const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
|
||||
assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
|
||||
|
||||
/*! create a leaf node when threshold reached or SAH tells us to stop */
|
||||
if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
|
||||
heuristic.deterministic_order(current.prims);
|
||||
return createLargeLeaf(current,alloc);
|
||||
}
|
||||
|
||||
/*! perform initial split */
|
||||
Set lprims,rprims;
|
||||
heuristic.split(split,current.prims,lprims,rprims);
|
||||
|
||||
/*! initialize child list with initial split */
|
||||
ReductionTy values[MAX_BRANCHING_FACTOR];
|
||||
BuildRecord children[MAX_BRANCHING_FACTOR];
|
||||
children[0] = BuildRecord(current.depth+1,lprims);
|
||||
children[1] = BuildRecord(current.depth+1,rprims);
|
||||
size_t numChildren = 2;
|
||||
|
||||
/*! split until node is full or SAH tells us to stop */
|
||||
while (numChildren < cfg.branchingFactor)
|
||||
{
|
||||
/*! find best child to split */
|
||||
float bestArea = neg_inf;
|
||||
ssize_t bestChild = -1;
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
{
|
||||
/* ignore leaves as they cannot get split */
|
||||
if (children[i].prims.size() <= cfg.minLeafSize) continue;
|
||||
|
||||
/* find child with largest surface area */
|
||||
if (halfArea(children[i].prims.geomBounds) > bestArea) {
|
||||
bestChild = i;
|
||||
bestArea = halfArea(children[i].prims.geomBounds);
|
||||
}
|
||||
}
|
||||
if (bestChild == -1) break;
|
||||
|
||||
/* perform best found split */
|
||||
BuildRecord& brecord = children[bestChild];
|
||||
BuildRecord lrecord(current.depth+1);
|
||||
BuildRecord rrecord(current.depth+1);
|
||||
auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
|
||||
heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
|
||||
children[bestChild ] = lrecord;
|
||||
children[numChildren] = rrecord;
|
||||
numChildren++;
|
||||
}
|
||||
|
||||
/* set barrier for primrefarrayalloc */
|
||||
if (unlikely(current.size() > cfg.primrefarrayalloc))
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
|
||||
|
||||
/* sort buildrecords for faster shadow ray traversal */
|
||||
std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
|
||||
|
||||
/*! create an inner node */
|
||||
auto node = createNode(children,numChildren,alloc);
|
||||
|
||||
/* spawn tasks */
|
||||
if (current.size() > cfg.singleThreadThreshold)
|
||||
{
|
||||
/*! parallel_for is faster than spawning sub-tasks */
|
||||
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
values[i] = recurse(children[i],nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
}
|
||||
});
|
||||
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
/* recurse into each child */
|
||||
else
|
||||
{
|
||||
for (size_t i=0; i<numChildren; i++)
|
||||
values[i] = recurse(children[i],alloc,false);
|
||||
|
||||
return updateNode(current,children,node,values,numChildren);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Settings cfg;
|
||||
PrimRef* prims;
|
||||
Heuristic& heuristic;
|
||||
const CreateAllocFunc& createAlloc;
|
||||
const CreateNodeFunc& createNode;
|
||||
const UpdateNodeFunc& updateNode;
|
||||
const CreateLeafFunc& createLeaf;
|
||||
const CanCreateLeafFunc& canCreateLeaf;
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit;
|
||||
const ProgressMonitor& progressMonitor;
|
||||
};
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
__noinline static ReductionTy build(Heuristic& heuristic,
|
||||
PrimRef* prims,
|
||||
const Set& set,
|
||||
CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
|
||||
typedef BuilderT<
|
||||
BuildRecord,
|
||||
Heuristic,
|
||||
Set,
|
||||
PrimRef,
|
||||
ReductionTy,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
UpdateNodeFunc,
|
||||
CreateLeafFunc,
|
||||
DefaultCanCreateLeafFunc<PrimRef, Set>,
|
||||
DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
/* instantiate builder */
|
||||
Builder builder(prims,
|
||||
heuristic,
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
DefaultCanCreateLeafFunc<PrimRef, Set>(),
|
||||
DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
/* build hierarchy */
|
||||
BuildRecord record(1,set);
|
||||
const ReductionTy root = builder.recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename Heuristic,
|
||||
typename Set,
|
||||
typename PrimRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
__noinline static ReductionTy build(Heuristic& heuristic,
|
||||
PrimRef* prims,
|
||||
const Set& set,
|
||||
CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
|
||||
typedef BuilderT<
|
||||
BuildRecord,
|
||||
Heuristic,
|
||||
Set,
|
||||
PrimRef,
|
||||
ReductionTy,
|
||||
decltype(createAlloc()),
|
||||
CreateAllocFunc,
|
||||
CreateNodeFunc,
|
||||
UpdateNodeFunc,
|
||||
CreateLeafFunc,
|
||||
CanCreateLeafFunc,
|
||||
CanCreateLeafSplitFunc,
|
||||
ProgressMonitor> Builder;
|
||||
|
||||
/* instantiate builder */
|
||||
Builder builder(prims,
|
||||
heuristic,
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
canCreateLeaf,
|
||||
canCreateLeafSplit,
|
||||
progressMonitor,
|
||||
settings);
|
||||
|
||||
/* build hierarchy */
|
||||
BuildRecord record(1,set);
|
||||
const ReductionTy root = builder.recurse(record,nullptr,true);
|
||||
_mm_mfence(); // to allow non-temporal stores during build
|
||||
return root;
|
||||
}
|
||||
};
|
||||
|
||||
/* SAH builder that operates on an array of BuildRecords */
|
||||
struct BVHBuilderBinnedSAH
|
||||
{
|
||||
typedef PrimInfoRange Set;
|
||||
typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
PrimRef* prims, const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
Heuristic heuristic(prims);
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoRange(0,pinfo.size(),pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename CanCreateLeafFunc,
|
||||
typename CanCreateLeafSplitFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
const CanCreateLeafFunc& canCreateLeaf,
|
||||
const CanCreateLeafSplitFunc& canCreateLeafSplit,
|
||||
const ProgressMonitor& progressMonitor,
|
||||
PrimRef* prims, const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
Heuristic heuristic(prims);
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoRange(0,pinfo.size(),pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
canCreateLeaf,
|
||||
canCreateLeafSplit,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
|
||||
/* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
|
||||
struct BVHBuilderBinnedFastSpatialSAH
|
||||
{
|
||||
typedef PrimInfoExtRange Set;
|
||||
typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
template<typename ReductionTy, typename UserCreateLeaf>
|
||||
struct CreateLeafExt
|
||||
{
|
||||
__forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
|
||||
: userCreateLeaf(userCreateLeaf) {}
|
||||
|
||||
// __noinline is workaround for ICC2016 compiler bug
|
||||
template<typename Allocator>
|
||||
__noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
|
||||
{
|
||||
for (size_t i=range.begin(); i<range.end(); i++)
|
||||
prims[i].lower.u &= GEOMID_MASK;
|
||||
|
||||
return userCreateLeaf(prims,range,alloc);
|
||||
}
|
||||
|
||||
const UserCreateLeaf userCreateLeaf;
|
||||
};
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename SplitPrimitiveFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode,
|
||||
UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
SplitPrimitiveFunc splitPrimitive,
|
||||
ProgressMonitor progressMonitor,
|
||||
PrimRef* prims,
|
||||
const size_t extSize,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
|
||||
Heuristic heuristic(splitPrimitive,prims,pinfo);
|
||||
|
||||
/* calculate total surface area */ // FIXME: this sum is not deterministic
|
||||
const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
|
||||
|
||||
double A = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
A += area(prim.bounds());
|
||||
}
|
||||
return A;
|
||||
},std::plus<double>());
|
||||
|
||||
|
||||
/* calculate maximum number of spatial splits per primitive */
|
||||
const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
|
||||
const float f = 10.0f;
|
||||
|
||||
const float invA = 1.0f / A;
|
||||
parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
|
||||
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
assert((prim.geomID() & SPLITS_MASK) == 0);
|
||||
// FIXME: is there a better general heuristic ?
|
||||
const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
|
||||
unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
|
||||
prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
}
|
||||
});
|
||||
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
|
||||
/* Open/Merge SAH builder that operates on an array of BuildRecords */
|
||||
struct BVHBuilderBinnedOpenMergeSAH
|
||||
{
|
||||
static const size_t NUM_OBJECT_BINS_HQ = 32;
|
||||
typedef PrimInfoExtRange Set;
|
||||
typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
|
||||
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
|
||||
typedef GeneralBVHBuilder::Settings Settings;
|
||||
|
||||
/*! special builder that propagates reduction over the tree */
|
||||
template<
|
||||
typename ReductionTy,
|
||||
typename BuildRef,
|
||||
typename CreateAllocFunc,
|
||||
typename CreateNodeFunc,
|
||||
typename UpdateNodeFunc,
|
||||
typename CreateLeafFunc,
|
||||
typename NodeOpenerFunc,
|
||||
typename ProgressMonitor>
|
||||
|
||||
static ReductionTy build(CreateAllocFunc createAlloc,
|
||||
CreateNodeFunc createNode,
|
||||
UpdateNodeFunc updateNode,
|
||||
const CreateLeafFunc& createLeaf,
|
||||
NodeOpenerFunc nodeOpenerFunc,
|
||||
ProgressMonitor progressMonitor,
|
||||
BuildRef* prims,
|
||||
const size_t extSize,
|
||||
const PrimInfo& pinfo,
|
||||
const Settings& settings)
|
||||
{
|
||||
typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
|
||||
Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
|
||||
|
||||
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
|
||||
heuristic,
|
||||
prims,
|
||||
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
|
||||
createAlloc,
|
||||
createNode,
|
||||
updateNode,
|
||||
createLeaf,
|
||||
progressMonitor,
|
||||
settings);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
552
engine/thirdparty/embree/kernels/builders/heuristic_binning.h
vendored
Normal file
552
engine/thirdparty/embree/kernels/builders/heuristic_binning.h
vendored
Normal file
|
|
@ -0,0 +1,552 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! mapping into bins */
|
||||
template<size_t BINS>
|
||||
struct BinMapping
|
||||
{
|
||||
public:
|
||||
__forceinline BinMapping() {}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline BinMapping(size_t N, const BBox3fa& centBounds)
|
||||
{
|
||||
num = min(BINS,size_t(4.0f + 0.05f*N));
|
||||
assert(num >= 1);
|
||||
const vfloat4 eps = 1E-34f;
|
||||
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) centBounds.lower;
|
||||
}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline BinMapping(const BBox3fa& centBounds)
|
||||
{
|
||||
num = BINS;
|
||||
const vfloat4 eps = 1E-34f;
|
||||
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) centBounds.lower;
|
||||
}
|
||||
|
||||
/*! calculates the mapping */
|
||||
template<typename PrimInfo>
|
||||
__forceinline BinMapping(const PrimInfo& pinfo)
|
||||
{
|
||||
const vfloat4 eps = 1E-34f;
|
||||
num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
|
||||
const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
|
||||
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
|
||||
ofs = (vfloat4) pinfo.centBounds.lower;
|
||||
}
|
||||
|
||||
/*! returns number of bins */
|
||||
__forceinline size_t size() const { return num; }
|
||||
|
||||
/*! slower but safe binning */
|
||||
__forceinline Vec3ia bin(const Vec3fa& p) const
|
||||
{
|
||||
const vint4 i = floori((vfloat4(p)-ofs)*scale);
|
||||
assert(i[0] >= 0 && (size_t)i[0] < num);
|
||||
assert(i[1] >= 0 && (size_t)i[1] < num);
|
||||
assert(i[2] >= 0 && (size_t)i[2] < num);
|
||||
|
||||
// we clamp to handle corner cases that could calculate out of bounds bin
|
||||
return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
__forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
|
||||
return Vec3ia(floori((vfloat4(p)-ofs)*scale));
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
template<typename PrimRef>
|
||||
__forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
|
||||
return bin_unsafe(p.binCenter());
|
||||
}
|
||||
|
||||
/*! faster but unsafe binning */
|
||||
template<typename PrimRef, typename BinBoundsAndCenter>
|
||||
__forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
|
||||
return bin_unsafe(binBoundsAndCenter.binCenter(p));
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline bool bin_unsafe(const PrimRef& ref,
|
||||
const vint4& vSplitPos,
|
||||
const vbool4& splitDimMask) const // FIXME: rename to isLeft
|
||||
{
|
||||
return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
|
||||
}
|
||||
/*! calculates left spatial position of bin */
|
||||
__forceinline float pos(const size_t bin, const size_t dim) const {
|
||||
return madd(float(bin),1.0f / scale[dim],ofs[dim]);
|
||||
}
|
||||
|
||||
/*! returns true if the mapping is invalid in some dimension */
|
||||
__forceinline bool invalid(const size_t dim) const {
|
||||
return scale[dim] == 0.0f;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
|
||||
return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t num;
|
||||
vfloat4 ofs,scale; //!< linear function that maps to bin ID
|
||||
};
|
||||
|
||||
/*! stores all information to perform some split */
|
||||
template<size_t BINS>
|
||||
struct BinSplit
|
||||
{
|
||||
enum
|
||||
{
|
||||
SPLIT_OBJECT = 0,
|
||||
SPLIT_FALLBACK = 1,
|
||||
SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
|
||||
SPLIT_TEMPORAL = 2,
|
||||
SPLIT_GEOMID = 3,
|
||||
};
|
||||
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline BinSplit()
|
||||
: sah(inf), dim(-1), pos(0), data(0) {}
|
||||
|
||||
__forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
|
||||
: sah(sah), dim(dim), fpos(fpos), data(data) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
|
||||
|
||||
/*! tests if this split is valid */
|
||||
__forceinline bool valid() const { return dim != -1; }
|
||||
|
||||
/*! calculates surface area heuristic for performing the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
|
||||
return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
int dim; //!< split dimension
|
||||
union { int pos; float fpos; }; //!< bin index for splitting
|
||||
unsigned int data; //!< extra optional split data
|
||||
BinMapping<BINS> mapping; //!< mapping into bins
|
||||
};
|
||||
|
||||
/*! stores extended information about the split */
|
||||
template<typename BBox>
|
||||
struct SplitInfoT
|
||||
{
|
||||
|
||||
__forceinline SplitInfoT () {}
|
||||
|
||||
__forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
|
||||
: leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
|
||||
|
||||
public:
|
||||
size_t leftCount,rightCount;
|
||||
BBox leftBounds,rightBounds;
|
||||
};
|
||||
|
||||
typedef SplitInfoT<BBox3fa> SplitInfo;
|
||||
typedef SplitInfoT<LBBox3fa> SplitInfo2;
|
||||
|
||||
/*! stores all binning information */
|
||||
template<size_t BINS, typename PrimRef, typename BBox>
|
||||
struct __aligned(64) BinInfoT
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef vbool4 vbool;
|
||||
typedef vint4 vint;
|
||||
typedef vfloat4 vfloat;
|
||||
|
||||
__forceinline BinInfoT() {
|
||||
}
|
||||
|
||||
__forceinline BinInfoT(EmptyTy) {
|
||||
clear();
|
||||
}
|
||||
|
||||
/*! bin access function */
|
||||
__forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
|
||||
__forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
|
||||
|
||||
__forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
|
||||
__forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
|
||||
|
||||
__forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
|
||||
__forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
|
||||
|
||||
/*! clears the bin info */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++) {
|
||||
bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
|
||||
counts(i) = vuint4(zero);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
|
||||
{
|
||||
if (unlikely(N == 0)) return;
|
||||
size_t i;
|
||||
for (i=0; i<N-1; i+=2)
|
||||
{
|
||||
/*! map even and odd primitive to bin */
|
||||
BBox prim0; Vec3fa center0;
|
||||
prims[i+0].binBoundsAndCenter(prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
BBox prim1; Vec3fa center1;
|
||||
prims[i+1].binBoundsAndCenter(prim1,center1);
|
||||
const vint4 bin1 = (vint4)mapping.bin(center1);
|
||||
|
||||
/*! increase bounds for bins for even primitive */
|
||||
const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
|
||||
const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
|
||||
const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
|
||||
const unsigned int s0 = (unsigned int)prims[i+0].size();
|
||||
counts(b00,0)+=s0;
|
||||
counts(b01,1)+=s0;
|
||||
counts(b02,2)+=s0;
|
||||
|
||||
/*! increase bounds of bins for odd primitive */
|
||||
const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
|
||||
const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
|
||||
const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
|
||||
const unsigned int s1 = (unsigned int)prims[i+1].size();
|
||||
counts(b10,0)+=s1;
|
||||
counts(b11,1)+=s1;
|
||||
counts(b12,2)+=s1;
|
||||
}
|
||||
/*! for uneven number of primitives */
|
||||
if (i < N)
|
||||
{
|
||||
/*! map primitive to bin */
|
||||
BBox prim0; Vec3fa center0;
|
||||
prims[i].binBoundsAndCenter(prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
/*! increase bounds of bins */
|
||||
const unsigned int s0 = (unsigned int)prims[i].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
template<typename BinBoundsAndCenter>
|
||||
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (N == 0) return;
|
||||
|
||||
size_t i;
|
||||
for (i=0; i<N-1; i+=2)
|
||||
{
|
||||
/*! map even and odd primitive to bin */
|
||||
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
|
||||
const vint4 bin1 = (vint4)mapping.bin(center1);
|
||||
|
||||
/*! increase bounds for bins for even primitive */
|
||||
const unsigned int s0 = prims[i+0].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
|
||||
/*! increase bounds of bins for odd primitive */
|
||||
const unsigned int s1 = prims[i+1].size();
|
||||
const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
|
||||
const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
|
||||
const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
|
||||
}
|
||||
|
||||
/*! for uneven number of primitives */
|
||||
if (i < N)
|
||||
{
|
||||
/*! map primitive to bin */
|
||||
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
|
||||
const vint4 bin0 = (vint4)mapping.bin(center0);
|
||||
|
||||
/*! increase bounds of bins */
|
||||
const unsigned int s0 = prims[i+0].size();
|
||||
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
|
||||
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
|
||||
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
|
||||
bin(prims+begin,end-begin,mapping);
|
||||
}
|
||||
|
||||
template<typename BinBoundsAndCenter>
|
||||
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
|
||||
bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
__forceinline void merge (const BinInfoT& other, size_t numBins)
|
||||
{
|
||||
|
||||
for (size_t i=0; i<numBins; i++)
|
||||
{
|
||||
counts(i) += other.counts(i);
|
||||
bounds(i,0).extend(other.bounds(i,0));
|
||||
bounds(i,1).extend(other.bounds(i,1));
|
||||
bounds(i,2).extend(other.bounds(i,2));
|
||||
}
|
||||
}
|
||||
|
||||
/*! reduces binning information */
|
||||
static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
|
||||
{
|
||||
BinInfoT c;
|
||||
for (size_t i=0; i<numBins; i++)
|
||||
{
|
||||
c.counts(i) = a.counts(i)+b.counts(i);
|
||||
c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
|
||||
c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
|
||||
c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
__forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
|
||||
for (size_t i=mapping.size()-1; i>0; i--)
|
||||
{
|
||||
count += counts(i);
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = (1 << blocks_shift)-1;
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<mapping.size(); i++, ii+=1)
|
||||
{
|
||||
count += counts(i-1);
|
||||
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
|
||||
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
|
||||
//const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
|
||||
|
||||
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
|
||||
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
}
|
||||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
|
||||
for (size_t i=mapping.size()-1; i>0; i--)
|
||||
{
|
||||
count += counts(i);
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = blockSize-1;
|
||||
vfloat4 blocks_factor = 1.0f/float(blockSize);
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<mapping.size(); i++, ii+=1)
|
||||
{
|
||||
count += counts(i-1);
|
||||
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
|
||||
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
|
||||
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);
|
||||
const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);
|
||||
const vfloat4 sah = madd(lArea,lCount,rArea*rCount);
|
||||
|
||||
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
|
||||
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
}
|
||||
}
|
||||
return Split(bestSAH,bestDim,bestPos,mapping);
|
||||
}
|
||||
|
||||
/*! calculates extended split information */
|
||||
__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
|
||||
{
|
||||
if (split.dim == -1) {
|
||||
new (&info) SplitInfoT<BBox>(0,empty,0,empty);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t leftCount = 0;
|
||||
BBox leftBounds = empty;
|
||||
for (size_t i=0; i<(size_t)split.pos; i++) {
|
||||
leftCount += counts(i,split.dim);
|
||||
leftBounds.extend(bounds(i,split.dim));
|
||||
}
|
||||
size_t rightCount = 0;
|
||||
BBox rightBounds = empty;
|
||||
for (size_t i=split.pos; i<mapping.size(); i++) {
|
||||
rightCount += counts(i,split.dim);
|
||||
rightBounds.extend(bounds(i,split.dim));
|
||||
}
|
||||
new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
|
||||
}
|
||||
|
||||
/*! gets the number of primitives left of the split */
|
||||
__forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
|
||||
{
|
||||
if (unlikely(split.dim == -1)) return -1;
|
||||
|
||||
size_t leftCount = 0;
|
||||
for (size_t i = 0; i < (size_t)split.pos; i++) {
|
||||
leftCount += counts(i, split.dim);
|
||||
}
|
||||
return leftCount;
|
||||
}
|
||||
|
||||
/*! gets the number of primitives right of the split */
|
||||
__forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
|
||||
{
|
||||
if (unlikely(split.dim == -1)) return -1;
|
||||
|
||||
size_t rightCount = 0;
|
||||
for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
|
||||
rightCount += counts(i, split.dim);
|
||||
}
|
||||
return rightCount;
|
||||
}
|
||||
|
||||
private:
|
||||
BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
|
||||
vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
|
||||
};
|
||||
}
|
||||
|
||||
template<typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
binner.bin(prims,begin,end,mapping);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
|
||||
{
|
||||
if (!parallel) {
|
||||
binner.bin(prims,begin,end,mapping);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
|
||||
template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
|
||||
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
|
||||
{
|
||||
if (!parallel) {
|
||||
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
|
||||
} else {
|
||||
binner = parallel_reduce(begin,end,blockSize,binner,
|
||||
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
|
||||
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
|
||||
}
|
||||
}
|
||||
}
|
||||
249
engine/thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h
vendored
Normal file
249
engine/thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h
vendored
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
|
||||
{
|
||||
__forceinline PrimInfoRange () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoRange(const PrimInfo& pinfo)
|
||||
: CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
|
||||
|
||||
__forceinline PrimInfoRange(EmptyTy)
|
||||
: CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
|
||||
|
||||
__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
|
||||
|
||||
__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline range<size_t> get_range() const {
|
||||
return range<size_t>(begin(),end());
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeomBBox3fa::extend_primref(prim);
|
||||
_end++;
|
||||
}
|
||||
};
|
||||
|
||||
inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
const size_t begin = pinfo.begin();
|
||||
const size_t end = pinfo.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&linfo) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rinfo) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
template<typename Type, typename getTypeFunc>
|
||||
inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
CentGeomBBox3fa local_left(empty), local_right(empty);
|
||||
auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
|
||||
const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
|
||||
linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
|
||||
rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
|
||||
}
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRef, size_t BINS>
|
||||
struct HeuristicArrayBinningSAH
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
|
||||
typedef range<size_t> Set;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
__forceinline HeuristicArrayBinningSAH ()
|
||||
: prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArrayBinningSAH (PrimRef* prims)
|
||||
: prims(prims) {}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
return find_template<false>(pinfo,logBlockSize);
|
||||
else
|
||||
return find_template<true>(pinfo,logBlockSize);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(pinfo);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
return find_block_size_template<false>(pinfo,blockSize);
|
||||
else
|
||||
return find_block_size_template<true>(pinfo,blockSize);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(pinfo);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
|
||||
return binner.best_block_size(mapping,blockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
|
||||
split_template<false>(split,pinfo,linfo,rinfo);
|
||||
else
|
||||
split_template<true>(split,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
template<bool parallel>
|
||||
__forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename Binner::vint vSplitPos(splitPos);
|
||||
const typename Binner::vbool vSplitMask(splitDimMask);
|
||||
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
size_t center = 0;
|
||||
if (!parallel)
|
||||
center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
|
||||
else
|
||||
center = parallel_partitioning(
|
||||
prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
|
||||
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const PrimInfoRange& pinfo)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
|
||||
performFallbackSplit(prims,pinfo,linfo,rinfo);
|
||||
}
|
||||
|
||||
void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
|
||||
{
|
||||
assert(range.size() > 1);
|
||||
CentGeomBBox3fa left(empty);
|
||||
CentGeomBBox3fa right(empty);
|
||||
unsigned int geomID = prims[range.begin()].geomID();
|
||||
size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
|
||||
[&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
|
||||
[ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
|
||||
|
||||
new (&linfo) PrimInfoRange(range.begin(),center,left);
|
||||
new (&rinfo) PrimInfoRange(center,range.end(),right);
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, size_t BINS>
|
||||
struct HeuristicArrayBinningMB
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<BINS> mapping(set.size(),set.centBounds);
|
||||
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
|
||||
Split osplit = binner.best(mapping,logBlockSize);
|
||||
osplit.sah *= set.time_range.size();
|
||||
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return osplit;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB left = empty;
|
||||
PrimInfoMB right = empty;
|
||||
const vint4 vSplitPos(split.pos);
|
||||
const vbool4 vSplitMask(1 << split.dim);
|
||||
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
|
||||
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
|
||||
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
|
||||
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
|
||||
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
}
|
||||
}
|
||||
302
engine/thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h
vendored
Normal file
302
engine/thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h
vendored
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRef, size_t BINS>
|
||||
struct UnalignedHeuristicArrayBinningSAH
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
|
||||
typedef range<size_t> Set;
|
||||
|
||||
__forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
|
||||
: scene(nullptr), prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
|
||||
: scene(scene), prims(prims) {}
|
||||
|
||||
const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
|
||||
{
|
||||
Vec3fa axis(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/*! find curve with minimum ID that defines valid direction */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const unsigned int geomID = prims[i].geomID();
|
||||
const unsigned int primID = prims[i].primID();
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
|
||||
if (sqr_length(axis1) > 1E-18f) {
|
||||
axis = normalize(axis1);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
return frame(axis).transposed();
|
||||
}
|
||||
|
||||
const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
|
||||
{
|
||||
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
|
||||
{
|
||||
CentGeomBBox3fa bounds(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
Geometry* mesh = scene->get(prims[i].geomID());
|
||||
bounds.extend(mesh->vbounds(space,prims[i].primID()));
|
||||
}
|
||||
return bounds;
|
||||
};
|
||||
|
||||
const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
|
||||
CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
|
||||
|
||||
return PrimInfo(set.begin(),set.end(),bounds);
|
||||
}
|
||||
|
||||
struct BinBoundsAndCenter
|
||||
{
|
||||
__forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
|
||||
: scene(scene), space(space) {}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter(const PrimRef& ref) const
|
||||
{
|
||||
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
|
||||
BBox3fa bounds = mesh->vbounds(space,ref.primID());
|
||||
return embree::center2(bounds);
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
|
||||
BBox3fa bounds = mesh->vbounds(space,ref.primID());
|
||||
bounds_o = bounds;
|
||||
center_o = embree::center2(bounds);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
const LinearSpace3fa space;
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
__forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
if (likely(pinfo.size() < 10000))
|
||||
return find_template<false>(pinfo,logBlockSize,space);
|
||||
else
|
||||
return find_template<true>(pinfo,logBlockSize,space);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
template<bool parallel>
|
||||
const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<BINS> mapping(set);
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,space);
|
||||
bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (likely(set.size() < 10000))
|
||||
split_template<false>(split,space,set,lset,rset);
|
||||
else
|
||||
split_template<true>(split,space,set,lset,rset);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
template<bool parallel>
|
||||
__forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
const int splitPos = split.pos;
|
||||
const int splitDim = split.dim;
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,space);
|
||||
|
||||
size_t center = 0;
|
||||
if (likely(set.size() < 10000))
|
||||
center = serial_partitioning(prims,begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
|
||||
else
|
||||
center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
|
||||
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
|
||||
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
128);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const range<size_t>& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[set.begin()],&prims[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend_center2(prims[i]);
|
||||
new (&lset) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend_center2(prims[i]);
|
||||
new (&rset) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* const scene;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, size_t BINS>
|
||||
struct UnalignedHeuristicArrayBinningMB
|
||||
{
|
||||
typedef BinSplit<BINS> Split;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
UnalignedHeuristicArrayBinningMB(Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
|
||||
{
|
||||
Vec3fa axis0(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/*! find curve with minimum ID that defines valid direction */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const PrimRefMB& prim = (*set.prims)[i];
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const unsigned int primID = prim.primID();
|
||||
const uint64_t geomprimID = prim.ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
|
||||
const Geometry* mesh = scene->get(geomID);
|
||||
const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
|
||||
if (tbounds.size() == 0) continue;
|
||||
|
||||
const size_t t = (tbounds.begin()+tbounds.end())/2;
|
||||
const Vec3fa axis1 = mesh->computeDirection(primID,t);
|
||||
if (sqr_length(axis1) > 1E-18f) {
|
||||
axis0 = normalize(axis1);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
return frame(axis0).transposed();
|
||||
}
|
||||
|
||||
struct BinBoundsAndCenter
|
||||
{
|
||||
__forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
|
||||
: scene(scene), time_range(time_range), space(space) {}
|
||||
|
||||
/*! returns center for binning */
|
||||
template<typename PrimRef>
|
||||
__forceinline Vec3fa binCenter(const PrimRef& ref) const
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
return center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
bounds_o = lbounds.interpolate(0.5f);
|
||||
center_o = center2(bounds_o);
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
|
||||
{
|
||||
Geometry* mesh = scene->get(ref.geomID());
|
||||
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
|
||||
bounds_o = lbounds;
|
||||
center_o = center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
BBox1f time_range;
|
||||
const LinearSpace3fa space;
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
|
||||
{
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<BINS> mapping(set.size(),set.centBounds);
|
||||
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
|
||||
Split osplit = binner.best(mapping,logBlockSize);
|
||||
osplit.sah *= set.time_range.size();
|
||||
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return osplit;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfoMB left = empty;
|
||||
PrimInfoMB right = empty;
|
||||
const vint4 vSplitPos(split.pos);
|
||||
const vbool4 vSplitMask(1 << split.dim);
|
||||
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
|
||||
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
|
||||
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
|
||||
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
|
||||
new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
|
||||
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
};
|
||||
}
|
||||
}
|
||||
443
engine/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
vendored
Normal file
443
engine/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
vendored
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// TODO:
|
||||
// - adjust parallel build thresholds
|
||||
// - openNodesBasedOnExtend should consider max extended size
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
#include "heuristic_spatial.h"
|
||||
|
||||
/* stop opening of all bref.geomIDs are the same */
|
||||
#define EQUAL_GEOMID_STOP_CRITERIA 1
|
||||
|
||||
/* 10% spatial extend threshold */
|
||||
#define MAX_EXTEND_THRESHOLD 0.1f
|
||||
|
||||
/* maximum is 8 children */
|
||||
#define MAX_OPENED_CHILD_NODES 8
|
||||
|
||||
/* open until all build refs are below threshold size in one step */
|
||||
#define USE_LOOP_OPENING 0
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
|
||||
struct HeuristicArrayOpenMergeSAH
|
||||
{
|
||||
typedef BinSplit<OBJECT_BINS> Split;
|
||||
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
static const size_t MOVE_STEP_SIZE = 64;
|
||||
static const size_t CREATE_SPLITS_STEP_SIZE = 128;
|
||||
|
||||
__forceinline HeuristicArrayOpenMergeSAH ()
|
||||
: prims0(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
|
||||
: prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
|
||||
{
|
||||
assert(max_open_size <= MAX_OPENED_CHILD_NODES);
|
||||
}
|
||||
|
||||
struct OpenHeuristic
|
||||
{
|
||||
__forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
|
||||
{
|
||||
const Vec3fa diag = pinfo.geomBounds.size();
|
||||
dim = maxDim(diag);
|
||||
assert(diag[dim] > 0.0f);
|
||||
inv_max_extend = 1.0f / diag[dim];
|
||||
}
|
||||
|
||||
__forceinline bool operator () ( PrimRef& prim ) const {
|
||||
return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t dim;
|
||||
float inv_max_extend;
|
||||
};
|
||||
|
||||
/*! compute extended ranges */
|
||||
__forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
|
||||
{
|
||||
assert(set.ext_range_size() > 0);
|
||||
const float left_factor = (float)lweight / (lweight + rweight);
|
||||
const size_t ext_range_size = set.ext_range_size();
|
||||
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
|
||||
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
|
||||
lset.set_ext_range(lset.end() + left_ext_range_size);
|
||||
rset.set_ext_range(rset.end() + right_ext_range_size);
|
||||
}
|
||||
|
||||
/*! move ranges */
|
||||
__forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t left_ext_range_size = lset.ext_range_size();
|
||||
const size_t right_size = rset.size();
|
||||
|
||||
/* has the left child an extended range? */
|
||||
if (left_ext_range_size > 0)
|
||||
{
|
||||
/* left extended range smaller than right range ? */
|
||||
if (left_ext_range_size < right_size)
|
||||
{
|
||||
/* only move a small part of the beginning of the right range to the end */
|
||||
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+right_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no overlap, move entire right range to new location, can be made fully parallel */
|
||||
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+left_ext_range_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
/* update right range */
|
||||
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
|
||||
rset.move_right(left_ext_range_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* estimates the extra space required when opening, and checks if all primitives are from same geometry */
|
||||
__noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
const unsigned int geomID = prims0[set.begin()].geomID();
|
||||
|
||||
auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
|
||||
bool commonGeomID = true;
|
||||
size_t opens = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
commonGeomID &= prims0[i].geomID() == geomID;
|
||||
if (heuristic(prims0[i]))
|
||||
opens += prims0[i].node.getN()-1; // coarse approximation
|
||||
}
|
||||
return std::pair<size_t,bool>(opens,commonGeomID);
|
||||
};
|
||||
auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
|
||||
return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
|
||||
};
|
||||
return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
|
||||
}
|
||||
|
||||
// FIXME: should consider maximum available extended size
|
||||
__noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
if (false && set.size() < PARALLEL_THRESHOLD)
|
||||
{
|
||||
size_t extra_elements = 0;
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
assert(extra_elements + n-1 <= set.ext_range_size());
|
||||
for (size_t j=0; j<n; j++)
|
||||
set.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1; j<n; j++)
|
||||
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
|
||||
extra_elements += n-1;
|
||||
}
|
||||
}
|
||||
set._end += extra_elements;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::atomic<size_t> ext_elements;
|
||||
ext_elements.store(0);
|
||||
PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo info(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
const size_t ID = ext_elements.fetch_add(n-1);
|
||||
assert(ID + n-1 <= set.ext_range_size());
|
||||
|
||||
for (size_t j=0; j<n; j++)
|
||||
info.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1; j<n; j++)
|
||||
prims0[ext_range_start+ID+j-1] = tmp[j];
|
||||
}
|
||||
return info;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
set.centBounds.extend(info.centBounds);
|
||||
assert(ext_elements.load() <= set.ext_range_size());
|
||||
set._end += ext_elements.load();
|
||||
}
|
||||
}
|
||||
|
||||
__noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
|
||||
{
|
||||
const OpenHeuristic heuristic(set);
|
||||
size_t next_iteration_extra_elements = est_new_elements;
|
||||
|
||||
while (next_iteration_extra_elements <= set.ext_range_size())
|
||||
{
|
||||
next_iteration_extra_elements = 0;
|
||||
size_t extra_elements = 0;
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
if (heuristic(prims0[i]))
|
||||
{
|
||||
PrimRef tmp[MAX_OPENED_CHILD_NODES];
|
||||
const size_t n = nodeOpenerFunc(prims0[i],tmp);
|
||||
assert(extra_elements + n-1 <= set.ext_range_size());
|
||||
for (size_t j=0;j<n;j++)
|
||||
set.extend_center2(tmp[j]);
|
||||
|
||||
prims0[i] = tmp[0];
|
||||
for (size_t j=1;j<n;j++)
|
||||
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
|
||||
extra_elements += n-1;
|
||||
|
||||
for (size_t j=0; j<n; j++)
|
||||
if (heuristic(tmp[j]))
|
||||
next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
|
||||
|
||||
}
|
||||
}
|
||||
assert( extra_elements <= set.ext_range_size());
|
||||
set._end += extra_elements;
|
||||
|
||||
for (size_t i=set.begin();i<set.end();i++)
|
||||
assert(prims0[i].numPrimitives() > 0);
|
||||
|
||||
if (unlikely(next_iteration_extra_elements == 0)) break;
|
||||
}
|
||||
}
|
||||
|
||||
__noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
/* single element */
|
||||
if (set.size() <= 1)
|
||||
return Split();
|
||||
|
||||
/* disable opening if there is no overlap */
|
||||
const size_t D = 4;
|
||||
if (unlikely(set.has_ext_range() && set.size() <= D))
|
||||
{
|
||||
bool disjoint = true;
|
||||
for (size_t j=set.begin(); j<set.end()-1; j++) {
|
||||
for (size_t i=set.begin()+1; i<set.end(); i++) {
|
||||
if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
|
||||
disjoint = false; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (disjoint) set.set_ext_range(set.end()); /* disables opening */
|
||||
}
|
||||
|
||||
std::pair<size_t,bool> p(0,false);
|
||||
|
||||
/* disable opening when all primitives are from same geometry */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
p = getProperties(set);
|
||||
#if EQUAL_GEOMID_STOP_CRITERIA == 1
|
||||
if (p.second) set.set_ext_range(set.end()); /* disable opening */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* open nodes when we have sufficient space available */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
#if USE_LOOP_OPENING == 1
|
||||
openNodesBasedOnExtendLoop(set,p.first);
|
||||
#else
|
||||
if (p.first <= set.ext_range_size())
|
||||
openNodesBasedOnExtend(set);
|
||||
#endif
|
||||
|
||||
/* disable opening when insufficient space for opening a node available */
|
||||
if (set.ext_range_size() < max_open_size-1)
|
||||
set.set_ext_range(set.end()); /* disable opening */
|
||||
}
|
||||
|
||||
/* find best split */
|
||||
return object_find(set,logBlockSize);
|
||||
}
|
||||
|
||||
|
||||
/*! finds the best object split */
|
||||
__forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
|
||||
else return parallel_object_find (set,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
|
||||
binner.bin(prims0,set.begin(),set.end(),mapping);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
Binner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
|
||||
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
auto body = [&] (const range<size_t>& r) -> Binner {
|
||||
Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
|
||||
};
|
||||
auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
|
||||
Binner r = b0; r.merge(b1,_mapping.size()); return r;
|
||||
};
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
|
||||
return binner.best(mapping,logBlockSize);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
PrimInfoExtRange set = set_i;
|
||||
|
||||
/* valid split */
|
||||
if (unlikely(!split.valid())) {
|
||||
deterministic_order(set);
|
||||
splitFallback(set,lset,rset);
|
||||
return;
|
||||
}
|
||||
|
||||
std::pair<size_t,size_t> ext_weights(0,0);
|
||||
|
||||
/* object split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_object_split(split,set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_object_split(split,set,lset,rset);
|
||||
|
||||
/* if we have an extended range, set extended child ranges and move right split range */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(local_left.size(),local_right.size());
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
|
||||
[] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
|
||||
return std::pair<size_t,size_t>(left.size(),right.size());
|
||||
}
|
||||
|
||||
void deterministic_order(const extended_range<size_t>& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims0[set.begin()],&prims0[set.end()]);
|
||||
}
|
||||
|
||||
__forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfo left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.add_center2(prims0[i]);
|
||||
|
||||
const size_t lweight = left.end;
|
||||
|
||||
PrimInfo right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.add_center2(prims0[i]);
|
||||
|
||||
const size_t rweight = right.end;
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
/* if we have an extended range */
|
||||
if (set.has_ext_range())
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,lweight,rweight);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims0;
|
||||
const NodeOpenerFunc& nodeOpenerFunc;
|
||||
size_t max_open_size;
|
||||
};
|
||||
}
|
||||
}
|
||||
366
engine/thirdparty/embree/kernels/builders/heuristic_spatial.h
vendored
Normal file
366
engine/thirdparty/embree/kernels/builders/heuristic_spatial.h
vendored
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
|
||||
|
||||
namespace isa
|
||||
{
|
||||
|
||||
/*! mapping into bins */
|
||||
template<size_t BINS>
|
||||
struct SpatialBinMapping
|
||||
{
|
||||
public:
|
||||
__forceinline SpatialBinMapping() {}
|
||||
|
||||
/*! calculates the mapping */
|
||||
__forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
|
||||
{
|
||||
const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
|
||||
const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
|
||||
const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
|
||||
const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
|
||||
scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
|
||||
ofs = (vfloat4) pinfo.geomBounds.lower;
|
||||
inv_scale = 1.0f / scale;
|
||||
}
|
||||
|
||||
/*! slower but safe binning */
|
||||
__forceinline vint4 bin(const Vec3fa& p) const
|
||||
{
|
||||
const vint4 i = floori((vfloat4(p)-ofs)*scale);
|
||||
return clamp(i,vint4(0),vint4(BINS-1));
|
||||
}
|
||||
|
||||
__forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
|
||||
{
|
||||
#if defined(__AVX__)
|
||||
const vfloat8 ofs8(ofs);
|
||||
const vfloat8 scale8(scale);
|
||||
const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
|
||||
const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
|
||||
return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
|
||||
#else
|
||||
const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
|
||||
const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
|
||||
const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
|
||||
const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
|
||||
return std::pair<vint4,vint4>(c_lower,c_upper);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*! calculates left spatial position of bin */
|
||||
__forceinline float pos(const size_t bin, const size_t dim) const {
|
||||
return madd(float(bin),inv_scale[dim],ofs[dim]);
|
||||
}
|
||||
|
||||
/*! calculates left spatial position of bin */
|
||||
template<size_t N>
|
||||
__forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
|
||||
return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
|
||||
}
|
||||
|
||||
/*! returns true if the mapping is invalid in some dimension */
|
||||
__forceinline bool invalid(const size_t dim) const {
|
||||
return scale[dim] == 0.0f;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
|
||||
};
|
||||
|
||||
/*! stores all information required to perform some split */
|
||||
template<size_t BINS>
|
||||
struct SpatialBinSplit
|
||||
{
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline SpatialBinSplit()
|
||||
: sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
|
||||
: sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
|
||||
|
||||
/*! tests if this split is valid */
|
||||
__forceinline bool valid() const { return dim != -1; }
|
||||
|
||||
/*! calculates surface area heuristic for performing the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
|
||||
return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
int dim; //!< split dimension
|
||||
int pos; //!< split position
|
||||
int left; //!< number of elements on the left side
|
||||
int right; //!< number of elements on the right side
|
||||
float factor; //!< factor splitting the extended range
|
||||
SpatialBinMapping<BINS> mapping; //!< mapping into bins
|
||||
};
|
||||
|
||||
/*! stores all binning information */
|
||||
template<size_t BINS, typename PrimRef>
|
||||
struct __aligned(64) SpatialBinInfo
|
||||
{
|
||||
SpatialBinInfo() {
|
||||
}
|
||||
|
||||
__forceinline SpatialBinInfo(EmptyTy) {
|
||||
clear();
|
||||
}
|
||||
|
||||
/*! clears the bin info */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++) {
|
||||
bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
|
||||
numBegin[i] = numEnd[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*! adds binning data */
|
||||
__forceinline void add(const size_t dim,
|
||||
const size_t beginID,
|
||||
const size_t endID,
|
||||
const size_t binID,
|
||||
const BBox3fa &b,
|
||||
const size_t n = 1)
|
||||
{
|
||||
assert(beginID < BINS);
|
||||
assert(endID < BINS);
|
||||
assert(binID < BINS);
|
||||
|
||||
numBegin[beginID][dim]+=(unsigned int)n;
|
||||
numEnd [endID][dim]+=(unsigned int)n;
|
||||
bounds [binID][dim].extend(b);
|
||||
}
|
||||
|
||||
/*! extends binning bounds */
|
||||
__forceinline void extend(const size_t dim,
|
||||
const size_t binID,
|
||||
const BBox3fa &b)
|
||||
{
|
||||
assert(binID < BINS);
|
||||
bounds [binID][dim].extend(b);
|
||||
}
|
||||
|
||||
/*! bins an array of primitives */
|
||||
template<typename PrimitiveSplitterFactory>
|
||||
__forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
|
||||
{
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
const PrimRef& prim = source[i];
|
||||
unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
if (unlikely(splits <= 1))
|
||||
{
|
||||
const vint4 bin = mapping.bin(center(prim.bounds()));
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
|
||||
add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const vint4 bin0 = mapping.bin(prim.bounds().lower);
|
||||
const vint4 bin1 = mapping.bin(prim.bounds().upper);
|
||||
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
size_t bin;
|
||||
size_t l = bin0[dim];
|
||||
size_t r = bin1[dim];
|
||||
|
||||
// same bin optimization
|
||||
if (likely(l == r))
|
||||
{
|
||||
add(dim,l,l,l,prim.bounds());
|
||||
continue;
|
||||
}
|
||||
size_t bin_start = bin0[dim];
|
||||
size_t bin_end = bin1[dim];
|
||||
BBox3fa rest = prim.bounds();
|
||||
|
||||
/* assure that split position always overlaps the primitive bounds */
|
||||
while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
|
||||
while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--;
|
||||
|
||||
const auto splitter = splitterFactory(prim);
|
||||
for (bin=bin_start; bin<bin_end; bin++)
|
||||
{
|
||||
const float pos = mapping.pos(bin+1,dim);
|
||||
BBox3fa left,right;
|
||||
splitter(rest,dim,pos,left,right);
|
||||
|
||||
if (unlikely(left.empty())) l++;
|
||||
extend(dim,bin,left);
|
||||
rest = right;
|
||||
}
|
||||
if (unlikely(rest.empty())) r--;
|
||||
add(dim,l,r,bin,rest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! bins an array of primitives */
|
||||
__forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
|
||||
{
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
const PrimRef &prim = source[i];
|
||||
const vint4 bin0 = mapping.bin(prim.bounds().lower);
|
||||
const vint4 bin1 = mapping.bin(prim.bounds().upper);
|
||||
|
||||
for (size_t dim=0; dim<3; dim++)
|
||||
{
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
const size_t l = bin0[dim];
|
||||
const size_t r = bin1[dim];
|
||||
|
||||
const unsigned int n = prim.primID();
|
||||
|
||||
// same bin optimization
|
||||
if (likely(l == r))
|
||||
{
|
||||
add(dim,l,l,l,prim.bounds(),n);
|
||||
continue;
|
||||
}
|
||||
const size_t bin_start = bin0[dim];
|
||||
const size_t bin_end = bin1[dim];
|
||||
for (size_t bin=bin_start; bin<bin_end; bin++)
|
||||
add(dim,l,r,bin,prim.bounds(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
void merge (const SpatialBinInfo& other)
|
||||
{
|
||||
for (size_t i=0; i<BINS; i++)
|
||||
{
|
||||
numBegin[i] += other.numBegin[i];
|
||||
numEnd [i] += other.numEnd [i];
|
||||
bounds[i][0].extend(other.bounds[i][0]);
|
||||
bounds[i][1].extend(other.bounds[i][1]);
|
||||
bounds[i][2].extend(other.bounds[i][2]);
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
|
||||
{
|
||||
SpatialBinInfo c(empty);
|
||||
for (size_t i=0; i<BINS; i++)
|
||||
{
|
||||
c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
|
||||
c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
|
||||
c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
|
||||
c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
|
||||
c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*! finds the best split by scanning binning information */
|
||||
SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
|
||||
{
|
||||
/* sweep from right to left and compute parallel prefix of merged bounds */
|
||||
vfloat4 rAreas[BINS];
|
||||
vuint4 rCounts[BINS];
|
||||
vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
|
||||
for (size_t i=BINS-1; i>0; i--)
|
||||
{
|
||||
count += numEnd[i];
|
||||
rCounts[i] = count;
|
||||
bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
|
||||
by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
|
||||
bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
|
||||
rAreas[i][3] = 0.0f;
|
||||
}
|
||||
|
||||
/* sweep from left to right and compute SAH */
|
||||
vuint4 blocks_add = (1 << blocks_shift)-1;
|
||||
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
|
||||
count = 0; bx = empty; by = empty; bz = empty;
|
||||
for (size_t i=1; i<BINS; i++, ii+=1)
|
||||
{
|
||||
count += numBegin[i-1];
|
||||
bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
|
||||
by.extend(bounds[i-1][1]); float Ay = halfArea(by);
|
||||
bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
|
||||
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
|
||||
const vfloat4 rArea = rAreas[i];
|
||||
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
|
||||
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
|
||||
// const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
|
||||
const vbool4 mask = sah < vbestSAH;
|
||||
vbestPos = select(mask,ii ,vbestPos);
|
||||
vbestSAH = select(mask,sah,vbestSAH);
|
||||
vbestlCount = select(mask,count,vbestlCount);
|
||||
vbestrCount = select(mask,rCounts[i],vbestrCount);
|
||||
}
|
||||
|
||||
/* find best dimension */
|
||||
float bestSAH = inf;
|
||||
int bestDim = -1;
|
||||
int bestPos = 0;
|
||||
unsigned int bestlCount = 0;
|
||||
unsigned int bestrCount = 0;
|
||||
for (int dim=0; dim<3; dim++)
|
||||
{
|
||||
/* ignore zero sized dimensions */
|
||||
if (unlikely(mapping.invalid(dim)))
|
||||
continue;
|
||||
|
||||
/* test if this is a better dimension */
|
||||
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
|
||||
bestDim = dim;
|
||||
bestPos = vbestPos[dim];
|
||||
bestSAH = vbestSAH[dim];
|
||||
bestlCount = vbestlCount[dim];
|
||||
bestrCount = vbestrCount[dim];
|
||||
}
|
||||
}
|
||||
assert(bestSAH >= 0.0f);
|
||||
|
||||
/* return invalid split if no split found */
|
||||
if (bestDim == -1)
|
||||
return SpatialBinSplit<BINS>(inf,-1,0,mapping);
|
||||
|
||||
/* return best found split */
|
||||
return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
|
||||
}
|
||||
|
||||
private:
|
||||
BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
|
||||
vuint4 numBegin[BINS]; //!< number of primitives starting in bin
|
||||
vuint4 numEnd[BINS]; //!< number of primitives ending in bin
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
547
engine/thirdparty/embree/kernels/builders/heuristic_spatial_array.h
vendored
Normal file
547
engine/thirdparty/embree/kernels/builders/heuristic_spatial_array.h
vendored
Normal file
|
|
@ -0,0 +1,547 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "heuristic_binning.h"
|
||||
#include "heuristic_spatial.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
#if 0
|
||||
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
|
||||
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
|
||||
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
|
||||
#else
|
||||
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
|
||||
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
|
||||
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
|
||||
#endif
|
||||
|
||||
struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
|
||||
{
|
||||
__forceinline PrimInfoExtRange() {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoExtRange(EmptyTy)
|
||||
: CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
|
||||
|
||||
__forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ObjectSplit, typename SpatialSplit>
|
||||
struct Split2
|
||||
{
|
||||
__forceinline Split2 () {}
|
||||
|
||||
__forceinline Split2 (const Split2& other)
|
||||
{
|
||||
spatial = other.spatial;
|
||||
sah = other.sah;
|
||||
if (spatial) spatialSplit() = other.spatialSplit();
|
||||
else objectSplit() = other.objectSplit();
|
||||
}
|
||||
|
||||
__forceinline Split2& operator= (const Split2& other)
|
||||
{
|
||||
spatial = other.spatial;
|
||||
sah = other.sah;
|
||||
if (spatial) spatialSplit() = other.spatialSplit();
|
||||
else objectSplit() = other.objectSplit();
|
||||
return *this;
|
||||
}
|
||||
|
||||
__forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
|
||||
__forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
|
||||
|
||||
__forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
|
||||
__forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
|
||||
|
||||
__forceinline Split2 (const ObjectSplit& objectSplit, float sah)
|
||||
: spatial(false), sah(sah)
|
||||
{
|
||||
new (data) ObjectSplit(objectSplit);
|
||||
}
|
||||
|
||||
__forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
|
||||
: spatial(true), sah(sah)
|
||||
{
|
||||
new (data) SpatialSplit(spatialSplit);
|
||||
}
|
||||
|
||||
__forceinline float splitSAH() const {
|
||||
return sah;
|
||||
}
|
||||
|
||||
__forceinline bool valid() const {
|
||||
return sah < float(inf);
|
||||
}
|
||||
|
||||
public:
|
||||
__aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
|
||||
bool spatial;
|
||||
float sah;
|
||||
};
|
||||
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>
|
||||
struct HeuristicArraySpatialSAH
|
||||
{
|
||||
typedef BinSplit<OBJECT_BINS> ObjectSplit;
|
||||
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
|
||||
|
||||
typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
|
||||
typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
|
||||
|
||||
//typedef extended_range<size_t> Set;
|
||||
typedef Split2<ObjectSplit,SpatialSplit> Split;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3*1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
static const size_t MOVE_STEP_SIZE = 64;
|
||||
static const size_t CREATE_SPLITS_STEP_SIZE = 64;
|
||||
|
||||
__forceinline HeuristicArraySpatialSAH ()
|
||||
: prims0(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
|
||||
: prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
|
||||
|
||||
|
||||
/*! compute extended ranges */
|
||||
__noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
|
||||
{
|
||||
assert(set.ext_range_size() > 0);
|
||||
const float left_factor = (float)lweight / (lweight + rweight);
|
||||
const size_t ext_range_size = set.ext_range_size();
|
||||
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
|
||||
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
|
||||
lset.set_ext_range(lset.end() + left_ext_range_size);
|
||||
rset.set_ext_range(rset.end() + right_ext_range_size);
|
||||
}
|
||||
|
||||
/*! move ranges */
|
||||
__noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t left_ext_range_size = lset.ext_range_size();
|
||||
const size_t right_size = rset.size();
|
||||
|
||||
/* has the left child an extended range? */
|
||||
if (left_ext_range_size > 0)
|
||||
{
|
||||
/* left extended range smaller than right range ? */
|
||||
if (left_ext_range_size < right_size)
|
||||
{
|
||||
/* only move a small part of the beginning of the right range to the end */
|
||||
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+right_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no overlap, move entire right range to new location, can be made fully parallel */
|
||||
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
prims0[i+left_ext_range_size] = prims0[i];
|
||||
});
|
||||
}
|
||||
/* update right range */
|
||||
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
|
||||
rset.move_right(left_ext_range_size);
|
||||
}
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SplitInfo oinfo;
|
||||
const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
|
||||
const float object_split_sah = object_split.splitSAH();
|
||||
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
|
||||
|
||||
/* do only spatial splits if the child bounds overlap */
|
||||
if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
|
||||
safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
|
||||
{
|
||||
const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
|
||||
const float spatial_split_sah = spatial_split.splitSAH();
|
||||
|
||||
/* valid spatial split, better SAH and number of splits do not exceed extended range */
|
||||
if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
|
||||
spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
|
||||
{
|
||||
return Split(spatial_split,spatial_split_sah);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Split(object_split,object_split_sah);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
|
||||
else return parallel_object_find (set,logBlockSize,info);
|
||||
}
|
||||
|
||||
/*! finds the best object split */
|
||||
__noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set);
|
||||
binner.bin(prims0,set.begin(),set.end(),mapping);
|
||||
ObjectSplit s = binner.best(mapping,logBlockSize);
|
||||
binner.getSplitInfo(mapping, s, info);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
__noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
|
||||
{
|
||||
ObjectBinner binner(empty);
|
||||
const BinMapping<OBJECT_BINS> mapping(set);
|
||||
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
|
||||
[&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
|
||||
[&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
|
||||
ObjectSplit s = binner.best(mapping,logBlockSize);
|
||||
binner.getSplitInfo(mapping, s, info);
|
||||
return s;
|
||||
}
|
||||
|
||||
/*! finds the best spatial split */
|
||||
__forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
|
||||
else return parallel_spatial_find (set, logBlockSize);
|
||||
}
|
||||
|
||||
/*! finds the best spatial split */
|
||||
__noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SpatialBinner binner(empty);
|
||||
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
|
||||
binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
|
||||
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
|
||||
return binner.best(mapping,logBlockSize); //,set.ext_size());
|
||||
}
|
||||
|
||||
__noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
|
||||
{
|
||||
SpatialBinner binner(empty);
|
||||
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
|
||||
const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
|
||||
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
|
||||
[&] (const range<size_t>& r) -> SpatialBinner {
|
||||
SpatialBinner binner(empty);
|
||||
binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
|
||||
return binner; },
|
||||
[&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
|
||||
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
|
||||
return binner.best(mapping,logBlockSize); //,set.ext_size());
|
||||
}
|
||||
|
||||
|
||||
/*! subdivides primitives based on a spatial split */
|
||||
__noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
|
||||
{
|
||||
assert(set.has_ext_range());
|
||||
const size_t max_ext_range_size = set.ext_range_size();
|
||||
const size_t ext_range_start = set.end();
|
||||
|
||||
/* atomic counter for number of primref splits */
|
||||
std::atomic<size_t> ext_elements;
|
||||
ext_elements.store(0);
|
||||
|
||||
const float fpos = split.mapping.pos(split.pos,split.dim);
|
||||
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
|
||||
parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin();i<r.end();i++)
|
||||
{
|
||||
const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
|
||||
|
||||
if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
|
||||
|
||||
//int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
|
||||
//int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
|
||||
//if (unlikely(bin0 < split.pos && bin1 >= split.pos))
|
||||
|
||||
if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
|
||||
{
|
||||
assert(splits > 1);
|
||||
|
||||
PrimRef left,right;
|
||||
const auto splitter = splitterFactory(prims0[i]);
|
||||
splitter(prims0[i],split.dim,fpos,left,right);
|
||||
|
||||
// no empty splits
|
||||
if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
|
||||
|
||||
left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
|
||||
const size_t ID = ext_elements.fetch_add(1);
|
||||
|
||||
/* break if the number of subdivided elements are greater than the maximum allowed size */
|
||||
if (unlikely(ID >= max_ext_range_size))
|
||||
break;
|
||||
|
||||
/* only write within the correct bounds */
|
||||
assert(ID < max_ext_range_size);
|
||||
prims0[i] = left;
|
||||
prims0[ext_range_start+ID] = right;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
|
||||
assert(set.end()+numExtElements<=set.ext_end());
|
||||
set._end += numExtElements;
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
PrimInfoExtRange set = set_i;
|
||||
|
||||
/* valid split */
|
||||
if (unlikely(!split.valid())) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
std::pair<size_t,size_t> ext_weights(0,0);
|
||||
|
||||
if (unlikely(split.spatial))
|
||||
{
|
||||
create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
|
||||
|
||||
/* spatial split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* object split */
|
||||
if (likely(set.size() < PARALLEL_THRESHOLD))
|
||||
ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
|
||||
else
|
||||
ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
|
||||
}
|
||||
|
||||
/* if we have an extended range, set extended child ranges and move right split range */
|
||||
if (unlikely(set.has_ext_range()))
|
||||
{
|
||||
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename ObjectBinner::vint vSplitPos(splitPos);
|
||||
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) {
|
||||
return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
|
||||
},
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
|
||||
const size_t left_weight = local_left.end;
|
||||
const size_t right_weight = local_right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
|
||||
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
|
||||
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo local_left(empty);
|
||||
PrimInfo local_right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
/* init spatial mapping */
|
||||
const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
size_t center = serial_partitioning(prims0,
|
||||
begin,end,local_left,local_right,
|
||||
[&] (const PrimRef& ref) {
|
||||
const Vec3fa c = ref.bounds().center();
|
||||
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
|
||||
},
|
||||
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
|
||||
|
||||
const size_t left_weight = local_left.end;
|
||||
const size_t right_weight = local_right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,local_right);
|
||||
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
|
||||
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
const typename ObjectBinner::vint vSplitPos(splitPos);
|
||||
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
|
||||
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
|
||||
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
const size_t left_weight = left.end;
|
||||
const size_t right_weight = right.end;
|
||||
|
||||
left.begin = begin; left.end = center;
|
||||
right.begin = center; right.end = end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
assert(area(left.geomBounds) >= 0.0f);
|
||||
assert(area(right.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
__noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
PrimInfo left(empty);
|
||||
PrimInfo right(empty);
|
||||
const unsigned int splitPos = split.pos;
|
||||
const unsigned int splitDim = split.dim;
|
||||
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
|
||||
|
||||
/* init spatial mapping */
|
||||
const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
|
||||
const vint4 vSplitPos(splitPos);
|
||||
const vbool4 vSplitMask( (int)splitDimMask );
|
||||
|
||||
auto isLeft = [&] (const PrimRef &ref) {
|
||||
const Vec3fa c = ref.bounds().center();
|
||||
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
|
||||
|
||||
const size_t center = parallel_partitioning(
|
||||
prims0,begin,end,EmptyTy(),left,right,isLeft,
|
||||
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
|
||||
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
|
||||
PARALLEL_PARTITION_BLOCK_SIZE);
|
||||
|
||||
const size_t left_weight = left.end;
|
||||
const size_t right_weight = right.end;
|
||||
|
||||
left.begin = begin; left.end = center;
|
||||
right.begin = center; right.end = end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
assert(area(left.geomBounds) >= 0.0f);
|
||||
assert(area(right.geomBounds) >= 0.0f);
|
||||
return std::pair<size_t,size_t>(left_weight,right_weight);
|
||||
}
|
||||
|
||||
void deterministic_order(const PrimInfoExtRange& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims0[set.begin()],&prims0[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const PrimInfoExtRange& set,
|
||||
PrimInfoExtRange& lset,
|
||||
PrimInfoExtRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
PrimInfo left(empty);
|
||||
for (size_t i=begin; i<center; i++) {
|
||||
left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
}
|
||||
const size_t lweight = left.end;
|
||||
|
||||
PrimInfo right(empty);
|
||||
for (size_t i=center; i<end; i++) {
|
||||
right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
|
||||
}
|
||||
const size_t rweight = right.end;
|
||||
|
||||
new (&lset) PrimInfoExtRange(begin,center,center,left);
|
||||
new (&rset) PrimInfoExtRange(center,end,end,right);
|
||||
|
||||
/* if we have an extended range */
|
||||
if (set.has_ext_range()) {
|
||||
setExtentedRanges(set,lset,rset,lweight,rweight);
|
||||
moveExtentedRange(set,lset,rset);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
PrimRef* const prims0;
|
||||
const PrimitiveSplitterFactory& splitterFactory;
|
||||
const CentGeomBBox3fa& root_info;
|
||||
};
|
||||
}
|
||||
}
|
||||
188
engine/thirdparty/embree/kernels/builders/heuristic_strand_array.h
vendored
Normal file
188
engine/thirdparty/embree/kernels/builders/heuristic_strand_array.h
vendored
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "priminfo.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
struct HeuristicStrandSplit
|
||||
{
|
||||
typedef range<size_t> Set;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 10000;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
|
||||
|
||||
/*! stores all information to perform some split */
|
||||
struct Split
|
||||
{
|
||||
/*! construct an invalid split by default */
|
||||
__forceinline Split()
|
||||
: sah(inf), axis0(zero), axis1(zero) {}
|
||||
|
||||
/*! constructs specified split */
|
||||
__forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
|
||||
: sah(sah), axis0(axis0), axis1(axis1) {}
|
||||
|
||||
/*! calculates standard surface area heuristic for the split */
|
||||
__forceinline float splitSAH() const { return sah; }
|
||||
|
||||
/*! test if this split is valid */
|
||||
__forceinline bool valid() const { return sah != float(inf); }
|
||||
|
||||
public:
|
||||
float sah; //!< SAH cost of the split
|
||||
Vec3fa axis0, axis1; //!< axis the two strands are aligned into
|
||||
};
|
||||
|
||||
__forceinline HeuristicStrandSplit () // FIXME: required?
|
||||
: scene(nullptr), prims(nullptr) {}
|
||||
|
||||
/*! remember prim array */
|
||||
__forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
|
||||
: scene(scene), prims(prims) {}
|
||||
|
||||
__forceinline const Vec3fa direction(const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->computeDirection(prim.primID());
|
||||
}
|
||||
|
||||
__forceinline const BBox3fa bounds(const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->vbounds(prim.primID());
|
||||
}
|
||||
|
||||
__forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
|
||||
return scene->get(prim.geomID())->vbounds(space,prim.primID());
|
||||
}
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const range<size_t>& set, size_t logBlockSize)
|
||||
{
|
||||
Vec3fa axis0(0,0,1);
|
||||
uint64_t bestGeomPrimID = -1;
|
||||
|
||||
/* curve with minimum ID determines first axis */
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
if (geomprimID >= bestGeomPrimID) continue;
|
||||
const Vec3fa axis = direction(prims[i]);
|
||||
if (sqr_length(axis) > 1E-18f) {
|
||||
axis0 = normalize(axis);
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
/* find 2nd axis that is most misaligned with first axis and has minimum ID */
|
||||
float bestCos = 1.0f;
|
||||
Vec3fa axis1 = axis0;
|
||||
bestGeomPrimID = -1;
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
const uint64_t geomprimID = prims[i].ID64();
|
||||
Vec3fa axisi = direction(prims[i]);
|
||||
float leni = length(axisi);
|
||||
if (leni == 0.0f) continue;
|
||||
axisi /= leni;
|
||||
float cos = abs(dot(axisi,axis0));
|
||||
if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
|
||||
bestCos = cos; axis1 = axisi;
|
||||
bestGeomPrimID = geomprimID;
|
||||
}
|
||||
}
|
||||
|
||||
/* partition the two strands */
|
||||
size_t lnum = 0, rnum = 0;
|
||||
BBox3fa lbounds = empty, rbounds = empty;
|
||||
const LinearSpace3fa space0 = frame(axis0).transposed();
|
||||
const LinearSpace3fa space1 = frame(axis1).transposed();
|
||||
|
||||
for (size_t i=set.begin(); i<set.end(); i++)
|
||||
{
|
||||
PrimRef& prim = prims[i];
|
||||
const Vec3fa axisi = normalize(direction(prim));
|
||||
const float cos0 = abs(dot(axisi,axis0));
|
||||
const float cos1 = abs(dot(axisi,axis1));
|
||||
|
||||
if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
|
||||
else { rnum++; rbounds.extend(bounds(space1,prim)); }
|
||||
}
|
||||
|
||||
/*! return an invalid split if we do not partition */
|
||||
if (lnum == 0 || rnum == 0)
|
||||
return Split(inf,axis0,axis1);
|
||||
|
||||
/*! calculate sah for the split */
|
||||
const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
|
||||
const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
|
||||
const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
|
||||
return Split(sah,axis0,axis1);
|
||||
}
|
||||
|
||||
/*! array partitioning */
|
||||
void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
if (!split.valid()) {
|
||||
deterministic_order(set);
|
||||
return splitFallback(set,lset,rset);
|
||||
}
|
||||
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
CentGeomBBox3fa local_left(empty);
|
||||
CentGeomBBox3fa local_right(empty);
|
||||
|
||||
auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
|
||||
const Vec3fa axisi = normalize(direction(prim));
|
||||
const float cos0 = abs(dot(axisi,split.axis0));
|
||||
const float cos1 = abs(dot(axisi,split.axis1));
|
||||
return cos0 > cos1;
|
||||
};
|
||||
|
||||
auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
|
||||
pinfo.extend(bounds(ref));
|
||||
};
|
||||
|
||||
size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
|
||||
|
||||
new (&lset) PrimInfoRange(begin,center,local_left);
|
||||
new (&rset) PrimInfoRange(center,end,local_right);
|
||||
assert(area(lset.geomBounds) >= 0.0f);
|
||||
assert(area(rset.geomBounds) >= 0.0f);
|
||||
}
|
||||
|
||||
void deterministic_order(const Set& set)
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
std::sort(&prims[set.begin()],&prims[set.end()]);
|
||||
}
|
||||
|
||||
void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
|
||||
{
|
||||
const size_t begin = set.begin();
|
||||
const size_t end = set.end();
|
||||
const size_t center = (begin + end)/2;
|
||||
|
||||
CentGeomBBox3fa left(empty);
|
||||
for (size_t i=begin; i<center; i++)
|
||||
left.extend(bounds(prims[i]));
|
||||
new (&lset) PrimInfoRange(begin,center,left);
|
||||
|
||||
CentGeomBBox3fa right(empty);
|
||||
for (size_t i=center; i<end; i++)
|
||||
right.extend(bounds(prims[i]));
|
||||
new (&rset) PrimInfoRange(center,end,right);
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* const scene;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
}
|
||||
}
|
||||
237
engine/thirdparty/embree/kernels/builders/heuristic_timesplit_array.h
vendored
Normal file
237
engine/thirdparty/embree/kernels/builders/heuristic_timesplit_array.h
vendored
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../builders/primref_mb.h"
|
||||
#include "../../common/algorithms/parallel_filter.h"
|
||||
|
||||
#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Performs standard object binning */
|
||||
template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS>
|
||||
struct HeuristicMBlurTemporalSplit
|
||||
{
|
||||
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
typedef typename PrimRefMB::BBox BBox;
|
||||
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef)
|
||||
: device(device), recalculatePrimRef(recalculatePrimRef) {}
|
||||
|
||||
struct TemporalBinInfo
|
||||
{
|
||||
__forceinline TemporalBinInfo () {
|
||||
}
|
||||
|
||||
__forceinline TemporalBinInfo (EmptyTy)
|
||||
{
|
||||
for (size_t i=0; i<BINS-1; i++)
|
||||
{
|
||||
count0[i] = count1[i] = 0;
|
||||
bounds0[i] = bounds1[i] = empty;
|
||||
}
|
||||
}
|
||||
|
||||
void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
|
||||
{
|
||||
for (int b=0; b<BINS-1; b++)
|
||||
{
|
||||
const float t = float(b+1)/float(BINS);
|
||||
const float ct = lerp(time_range.lower,time_range.upper,t);
|
||||
const float center_time = set.align_time(ct);
|
||||
if (center_time <= time_range.lower) continue;
|
||||
if (center_time >= time_range.upper) continue;
|
||||
const BBox1f dt0(time_range.lower,center_time);
|
||||
const BBox1f dt1(center_time,time_range.upper);
|
||||
|
||||
/* find linear bounds for both time segments */
|
||||
for (size_t i=begin; i<end; i++)
|
||||
{
|
||||
if (prims[i].time_range_overlap(dt0))
|
||||
{
|
||||
const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0);
|
||||
#if MBLUR_BIN_LBBOX
|
||||
bounds0[b].extend(bn0);
|
||||
#else
|
||||
bounds0[b].extend(bn0.interpolate(0.5f));
|
||||
#endif
|
||||
count0[b] += prims[i].timeSegmentRange(dt0).size();
|
||||
}
|
||||
|
||||
if (prims[i].time_range_overlap(dt1))
|
||||
{
|
||||
const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1);
|
||||
#if MBLUR_BIN_LBBOX
|
||||
bounds1[b].extend(bn1);
|
||||
#else
|
||||
bounds1[b].extend(bn1.interpolate(0.5f));
|
||||
#endif
|
||||
count1[b] += prims[i].timeSegmentRange(dt1).size();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
|
||||
{
|
||||
if (likely(end-begin < parallelThreshold)) {
|
||||
bin(prims,begin,end,time_range,set,recalculatePrimRef);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {
|
||||
TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;
|
||||
};
|
||||
*this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2);
|
||||
}
|
||||
}
|
||||
|
||||
/*! merges in other binning information */
|
||||
__forceinline void merge (const TemporalBinInfo& other)
|
||||
{
|
||||
for (size_t i=0; i<BINS-1; i++)
|
||||
{
|
||||
count0[i] += other.count0[i];
|
||||
count1[i] += other.count1[i];
|
||||
bounds0[i].extend(other.bounds0[i]);
|
||||
bounds1[i].extend(other.bounds1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) {
|
||||
TemporalBinInfo r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
Split best(int logBlockSize, BBox1f time_range, const SetMB& set)
|
||||
{
|
||||
float bestSAH = inf;
|
||||
float bestPos = 0.0f;
|
||||
for (int b=0; b<BINS-1; b++)
|
||||
{
|
||||
float t = float(b+1)/float(BINS);
|
||||
float ct = lerp(time_range.lower,time_range.upper,t);
|
||||
const float center_time = set.align_time(ct);
|
||||
if (center_time <= time_range.lower) continue;
|
||||
if (center_time >= time_range.upper) continue;
|
||||
const BBox1f dt0(time_range.lower,center_time);
|
||||
const BBox1f dt1(center_time,time_range.upper);
|
||||
|
||||
/* calculate sah */
|
||||
const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
|
||||
const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
|
||||
float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size();
|
||||
float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size();
|
||||
if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time
|
||||
if (unlikely(rCount == 0)) sah1 = 0.0f;
|
||||
const float sah = sah0+sah1;
|
||||
if (sah < bestSAH) {
|
||||
bestSAH = sah;
|
||||
bestPos = center_time;
|
||||
}
|
||||
}
|
||||
return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos);
|
||||
}
|
||||
|
||||
public:
|
||||
size_t count0[BINS-1];
|
||||
size_t count1[BINS-1];
|
||||
BBox bounds0[BINS-1];
|
||||
BBox bounds1[BINS-1];
|
||||
};
|
||||
|
||||
/*! finds the best split */
|
||||
const Split find(const SetMB& set, const size_t logBlockSize)
|
||||
{
|
||||
assert(set.size() > 0);
|
||||
TemporalBinInfo binner(empty);
|
||||
binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef);
|
||||
Split tsplit = binner.best((int)logBlockSize,set.time_range,set);
|
||||
if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split
|
||||
return tsplit;
|
||||
}
|
||||
|
||||
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset)
|
||||
{
|
||||
assert(tsplit.sah != float(inf));
|
||||
assert(tsplit.fpos > set.time_range.lower);
|
||||
assert(tsplit.fpos < set.time_range.upper);
|
||||
|
||||
float center_time = tsplit.fpos;
|
||||
const BBox1f time_range0(set.time_range.lower,center_time);
|
||||
const BBox1f time_range1(center_time,set.time_range.upper);
|
||||
mvector<PrimRefMB>& prims = *set.prims;
|
||||
|
||||
/* calculate primrefs for first time range */
|
||||
std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size()));
|
||||
PrimRefVector lprims = new_vector.get();
|
||||
|
||||
auto reduction_func0 = [&] (const range<size_t>& r) {
|
||||
PrimInfoMB pinfo = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (likely(prims[i].time_range_overlap(time_range0)))
|
||||
{
|
||||
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0);
|
||||
(*lprims)[i-set.begin()] = prim;
|
||||
pinfo.add_primref(prim);
|
||||
}
|
||||
else
|
||||
{
|
||||
(*lprims)[i-set.begin()] = prims[i];
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
};
|
||||
PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2);
|
||||
|
||||
/* primrefs for first time range are in lprims[0 .. set.size()) */
|
||||
/* some primitives may need to be filtered out */
|
||||
if (linfo.size() != set.size())
|
||||
linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024),
|
||||
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); });
|
||||
|
||||
lset = SetMB(linfo,lprims,time_range0);
|
||||
|
||||
/* calculate primrefs for second time range */
|
||||
auto reduction_func1 = [&] (const range<size_t>& r) {
|
||||
PrimInfoMB pinfo = empty;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (likely(prims[i].time_range_overlap(time_range1)))
|
||||
{
|
||||
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1);
|
||||
prims[i] = prim;
|
||||
pinfo.add_primref(prim);
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
};
|
||||
PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2);
|
||||
rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size());
|
||||
|
||||
/* primrefs for second time range are in prims[set.begin() .. set.end()) */
|
||||
/* some primitives may need to be filtered out */
|
||||
if (rinfo.size() != set.size())
|
||||
rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024),
|
||||
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); });
|
||||
|
||||
rset = SetMB(rinfo,&prims,time_range1);
|
||||
|
||||
return new_vector;
|
||||
}
|
||||
|
||||
private:
|
||||
MemoryMonitorInterface* device; // device to report memory usage to
|
||||
const RecalculatePrimRef recalculatePrimRef;
|
||||
};
|
||||
}
|
||||
}
|
||||
167
engine/thirdparty/embree/kernels/builders/priminfo.h
vendored
Normal file
167
engine/thirdparty/embree/kernels/builders/priminfo.h
vendored
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
// FIXME: maybe there's a better place for this util fct
|
||||
__forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2)
|
||||
{
|
||||
const Vec3fa e0 = v1-v0;
|
||||
const Vec3fa e1 = v2-v0;
|
||||
const Vec3fa d = cross(e0,e1);
|
||||
return fabs(d.x) + fabs(d.y) + fabs(d.z);
|
||||
}
|
||||
|
||||
//namespace isa
|
||||
//{
|
||||
template<typename BBox>
|
||||
class CentGeom
|
||||
{
|
||||
public:
|
||||
__forceinline CentGeom () {}
|
||||
|
||||
__forceinline CentGeom (EmptyTy)
|
||||
: geomBounds(empty), centBounds(empty) {}
|
||||
|
||||
__forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds)
|
||||
: geomBounds(geomBounds), centBounds(centBounds) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void extend_primref(const PrimRef& prim)
|
||||
{
|
||||
BBox bounds; Vec3fa center;
|
||||
prim.binBoundsAndCenter(bounds,center);
|
||||
geomBounds.extend(bounds);
|
||||
centBounds.extend(center);
|
||||
}
|
||||
|
||||
static void extend_ref (CentGeom& pinfo, const PrimRef& ref) {
|
||||
pinfo.extend_primref(ref);
|
||||
};
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void extend_center2(const PrimRef& prim)
|
||||
{
|
||||
BBox3fa bounds = prim.bounds();
|
||||
geomBounds.extend(bounds);
|
||||
centBounds.extend(bounds.center2());
|
||||
}
|
||||
|
||||
__forceinline void extend(const BBox& geomBounds_) {
|
||||
geomBounds.extend(geomBounds_);
|
||||
centBounds.extend(center2(geomBounds_));
|
||||
}
|
||||
|
||||
__forceinline void merge(const CentGeom& other)
|
||||
{
|
||||
geomBounds.extend(other.geomBounds);
|
||||
centBounds.extend(other.centBounds);
|
||||
}
|
||||
|
||||
static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) {
|
||||
CentGeom r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
public:
|
||||
BBox geomBounds; //!< geometry bounds of primitives
|
||||
BBox3fa centBounds; //!< centroid bounds of primitives
|
||||
};
|
||||
|
||||
typedef CentGeom<BBox3fa> CentGeomBBox3fa;
|
||||
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoT () {}
|
||||
|
||||
__forceinline PrimInfoT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), begin(0), end(0) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t N)
|
||||
: CentGeom<BBox>(empty), begin(0), end(N) {}
|
||||
|
||||
__forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
|
||||
: CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
end++;
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_center2(const PrimRef& prim) {
|
||||
CentGeom<BBox>::extend_center2(prim);
|
||||
end++;
|
||||
}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_center2(const PrimRef& prim, const size_t i) {
|
||||
CentGeom<BBox>::extend_center2(prim);
|
||||
end+=i;
|
||||
}
|
||||
|
||||
/*__forceinline void add(const BBox& geomBounds_) {
|
||||
CentGeom<BBox>::extend(geomBounds_);
|
||||
end++;
|
||||
}
|
||||
|
||||
__forceinline void add(const BBox& geomBounds_, const size_t i) {
|
||||
CentGeom<BBox>::extend(geomBounds_);
|
||||
end+=i;
|
||||
}*/
|
||||
|
||||
__forceinline void merge(const PrimInfoT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
begin += other.begin;
|
||||
end += other.end;
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) {
|
||||
PrimInfoT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return end-begin;
|
||||
}
|
||||
|
||||
__forceinline float halfArea() {
|
||||
return expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return expectedApproxHalfArea(geomBounds)*float(size());
|
||||
//return halfArea(geomBounds)*blocks(num);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
//return halfArea(geomBounds)*float((num+3) >> 2);
|
||||
//return halfArea(geomBounds)*blocks(num);
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) {
|
||||
return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}";
|
||||
}
|
||||
|
||||
public:
|
||||
size_t begin,end; //!< number of primitives
|
||||
};
|
||||
|
||||
typedef PrimInfoT<BBox3fa> PrimInfo;
|
||||
//typedef PrimInfoT<LBBox3fa> PrimInfoMB;
|
||||
//}
|
||||
}
|
||||
210
engine/thirdparty/embree/kernels/builders/priminfo_mb.h
vendored
Normal file
210
engine/thirdparty/embree/kernels/builders/priminfo_mb.h
vendored
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "primref_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! stores bounding information for a set of primitives */
|
||||
template<typename BBox>
|
||||
class PrimInfoMBT : public CentGeom<BBox>
|
||||
{
|
||||
public:
|
||||
using CentGeom<BBox>::geomBounds;
|
||||
using CentGeom<BBox>::centBounds;
|
||||
|
||||
__forceinline PrimInfoMBT () {
|
||||
}
|
||||
|
||||
__forceinline PrimInfoMBT (EmptyTy)
|
||||
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
__forceinline PrimInfoMBT (size_t begin, size_t end)
|
||||
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
|
||||
|
||||
template<typename PrimRef>
|
||||
__forceinline void add_primref(const PrimRef& prim)
|
||||
{
|
||||
CentGeom<BBox>::extend_primref(prim);
|
||||
time_range.extend(prim.time_range);
|
||||
object_range._end++;
|
||||
num_time_segments += prim.size();
|
||||
if (max_num_time_segments < prim.totalTimeSegments()) {
|
||||
max_num_time_segments = prim.totalTimeSegments();
|
||||
max_time_range = prim.time_range;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void merge(const PrimInfoMBT& other)
|
||||
{
|
||||
CentGeom<BBox>::merge(other);
|
||||
time_range.extend(other.time_range);
|
||||
object_range._begin += other.object_range.begin();
|
||||
object_range._end += other.object_range.end();
|
||||
num_time_segments += other.num_time_segments;
|
||||
if (max_num_time_segments < other.max_num_time_segments) {
|
||||
max_num_time_segments = other.max_num_time_segments;
|
||||
max_time_range = other.max_time_range;
|
||||
}
|
||||
}
|
||||
|
||||
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
|
||||
PrimInfoMBT r = a; r.merge(b); return r;
|
||||
}
|
||||
|
||||
__forceinline size_t begin() const {
|
||||
return object_range.begin();
|
||||
}
|
||||
|
||||
__forceinline size_t end() const {
|
||||
return object_range.end();
|
||||
}
|
||||
|
||||
/*! returns the number of primitives */
|
||||
__forceinline size_t size() const {
|
||||
return object_range.size();
|
||||
}
|
||||
|
||||
__forceinline float halfArea() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH() const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
|
||||
}
|
||||
|
||||
__forceinline float leafSAH(size_t block_shift) const {
|
||||
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
|
||||
}
|
||||
|
||||
__forceinline float align_time(float ct) const
|
||||
{
|
||||
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
|
||||
float t0 = (ct-max_time_range.lower)/max_time_range.size();
|
||||
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
|
||||
return t1*max_time_range.size()+max_time_range.lower;
|
||||
}
|
||||
|
||||
/*! stream output */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
|
||||
{
|
||||
return cout << "PrimInfo { " <<
|
||||
"object_range = " << pinfo.object_range <<
|
||||
", time_range = " << pinfo.time_range <<
|
||||
", time_segments = " << pinfo.num_time_segments <<
|
||||
", geomBounds = " << pinfo.geomBounds <<
|
||||
", centBounds = " << pinfo.centBounds <<
|
||||
"}";
|
||||
}
|
||||
|
||||
public:
|
||||
range<size_t> object_range; //!< primitive range
|
||||
size_t num_time_segments; //!< total number of time segments of all added primrefs
|
||||
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
|
||||
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
|
||||
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
|
||||
};
|
||||
|
||||
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
|
||||
|
||||
struct SetMB : public PrimInfoMB
|
||||
{
|
||||
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
|
||||
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
|
||||
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
|
||||
|
||||
typedef mvector<PrimRefMB>* PrimRefVector;
|
||||
|
||||
__forceinline SetMB() {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
|
||||
: PrimInfoMB(pinfo_i), prims(prims) {}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
object_range = object_range_in;
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
|
||||
: PrimInfoMB(pinfo_i), prims(prims)
|
||||
{
|
||||
time_range = intersect(time_range,time_range_in);
|
||||
}
|
||||
|
||||
void deterministic_order() const
|
||||
{
|
||||
/* required as parallel partition destroys original primitive order */
|
||||
PrimRefMB* prim = prims->data();
|
||||
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
|
||||
{
|
||||
LBBox3fa cbounds(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
|
||||
cbounds.extend(bn);
|
||||
};
|
||||
return cbounds;
|
||||
};
|
||||
|
||||
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
|
||||
reduce,
|
||||
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
|
||||
}
|
||||
|
||||
template<typename RecalculatePrimRef>
|
||||
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
|
||||
{
|
||||
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
PrimRefMB& ref = (*prims)[j];
|
||||
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
|
||||
pinfo.add_primref(ref1);
|
||||
};
|
||||
return pinfo;
|
||||
};
|
||||
|
||||
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
|
||||
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
|
||||
|
||||
return SetMB(pinfo,prims,object_range,time_range);
|
||||
}
|
||||
|
||||
public:
|
||||
PrimRefVector prims;
|
||||
};
|
||||
//}
|
||||
}
|
||||
139
engine/thirdparty/embree/kernels/builders/primref.h
vendored
Normal file
139
engine/thirdparty/embree/kernels/builders/primref.h
vendored
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct __aligned(32) PrimRef
|
||||
{
|
||||
__forceinline PrimRef () {}
|
||||
|
||||
#if defined(__AVX__)
|
||||
__forceinline PrimRef(const PrimRef& v) {
|
||||
vfloat8::store((float*)this,vfloat8::load((float*)&v));
|
||||
}
|
||||
__forceinline PrimRef& operator=(const PrimRef& v) {
|
||||
vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
__forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID)
|
||||
{
|
||||
lower = Vec3fx(bounds.lower, geomID);
|
||||
upper = Vec3fx(bounds.upper, primID);
|
||||
}
|
||||
|
||||
__forceinline PrimRef (const BBox3fa& bounds, size_t id)
|
||||
{
|
||||
#if defined(__64BIT__)
|
||||
lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF));
|
||||
upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF));
|
||||
#else
|
||||
lower = Vec3fx(bounds.lower, (unsigned)id);
|
||||
upper = Vec3fx(bounds.upper, (unsigned)0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! calculates twice the center of the primitive */
|
||||
__forceinline const Vec3fa center2() const {
|
||||
return lower+upper;
|
||||
}
|
||||
|
||||
/*! return the bounding box of the primitive */
|
||||
__forceinline const BBox3fa bounds() const {
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! size for bin heuristic is 1 */
|
||||
__forceinline unsigned size() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = embree::center2(bounds_o);
|
||||
}
|
||||
|
||||
__forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!!
|
||||
return lower.u;
|
||||
}
|
||||
__forceinline unsigned& primIDref() { // FIXME: remove !!!!!!!
|
||||
return upper.u;
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned geomID() const {
|
||||
return lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned primID() const {
|
||||
return upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(lower.u) + (size_t(upper.u) << 32);
|
||||
#else
|
||||
return size_t(lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) {
|
||||
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
Vec3fx lower; //!< lower bounds and geomID
|
||||
Vec3fx upper; //!< upper bounds and primID
|
||||
};
|
||||
|
||||
/*! fast exchange for PrimRefs */
|
||||
__forceinline void xchg(PrimRef& a, PrimRef& b)
|
||||
{
|
||||
#if defined(__AVX__)
|
||||
const vfloat8 aa = vfloat8::load((float*)&a);
|
||||
const vfloat8 bb = vfloat8::load((float*)&b);
|
||||
vfloat8::store((float*)&a,bb);
|
||||
vfloat8::store((float*)&b,aa);
|
||||
#else
|
||||
std::swap(a,b);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
struct SubGridBuildData {
|
||||
unsigned short sx,sy;
|
||||
unsigned int primID;
|
||||
|
||||
__forceinline SubGridBuildData() {};
|
||||
__forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {};
|
||||
|
||||
__forceinline size_t x() const { return (size_t)sx & 0x7fff; }
|
||||
__forceinline size_t y() const { return (size_t)sy & 0x7fff; }
|
||||
|
||||
};
|
||||
}
|
||||
262
engine/thirdparty/embree/kernels/builders/primref_mb.h
vendored
Normal file
262
engine/thirdparty/embree/kernels/builders/primref_mb.h
vendored
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
|
||||
#define MBLUR_BIN_LBBOX 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#if MBLUR_BIN_LBBOX
|
||||
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct PrimRefMB
|
||||
{
|
||||
typedef LBBox3fa BBox;
|
||||
|
||||
__forceinline PrimRefMB () {}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
lbounds.bounds0.lower.a = geomID;
|
||||
lbounds.bounds0.upper.a = primID;
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
lbounds.bounds0.lower.a = id & 0xFFFFFFFF;
|
||||
lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
lbounds.bounds0.lower.a = id;
|
||||
lbounds.bounds0.upper.a = 0;
|
||||
#endif
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
lbounds.bounds0.lower.u = id & 0xFFFFFFFF;
|
||||
lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
lbounds.bounds0.lower.u = id;
|
||||
lbounds.bounds0.upper.u = 0;
|
||||
#endif
|
||||
lbounds.bounds1.lower.a = activeTimeSegments;
|
||||
lbounds.bounds1.upper.a = totalTimeSegments;
|
||||
}
|
||||
|
||||
/*! returns bounds for binning */
|
||||
__forceinline LBBox3fa bounds() const {
|
||||
return lbounds;
|
||||
}
|
||||
|
||||
/*! returns the number of time segments of this primref */
|
||||
__forceinline unsigned size() const {
|
||||
return lbounds.bounds1.lower.a;
|
||||
}
|
||||
|
||||
__forceinline unsigned totalTimeSegments() const {
|
||||
return lbounds.bounds1.upper.a;
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
|
||||
return getTimeSegmentRange(range,time_range,float(totalTimeSegments()));
|
||||
}
|
||||
|
||||
/* returns time that corresponds to time step */
|
||||
__forceinline float timeStep(const int i) const {
|
||||
assert(i>=0 && i<=(int)totalTimeSegments());
|
||||
return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments());
|
||||
}
|
||||
|
||||
/*! checks if time range overlaps */
|
||||
__forceinline bool time_range_overlap(const BBox1f& range) const
|
||||
{
|
||||
if (0.9999f*time_range.upper <= range.lower) return false;
|
||||
if (1.0001f*time_range.lower >= range.upper) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter() const {
|
||||
return center2(lbounds.interpolate(0.5f));
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = binCenter();
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned geomID() const {
|
||||
return lbounds.bounds0.lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned primID() const {
|
||||
return lbounds.bounds0.upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32);
|
||||
#else
|
||||
return size_t(lbounds.bounds0.lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
|
||||
return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
LBBox3fx lbounds;
|
||||
BBox1f time_range; // entire geometry time range
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/*! A primitive reference stores the bounds of the primitive and its ID. */
|
||||
struct __aligned(16) PrimRefMB
|
||||
{
|
||||
typedef BBox3fa BBox;
|
||||
|
||||
__forceinline PrimRefMB () {}
|
||||
|
||||
__forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
|
||||
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
bbox.lower.a = geomID;
|
||||
bbox.upper.a = primID;
|
||||
}
|
||||
|
||||
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
|
||||
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
|
||||
{
|
||||
assert(activeTimeSegments > 0);
|
||||
#if defined(__64BIT__)
|
||||
bbox.lower.u = id & 0xFFFFFFFF;
|
||||
bbox.upper.u = (id >> 32) & 0xFFFFFFFF;
|
||||
#else
|
||||
bbox.lower.u = id;
|
||||
bbox.upper.u = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! returns bounds for binning */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
return bbox;
|
||||
}
|
||||
|
||||
/*! returns the number of time segments of this primref */
|
||||
__forceinline unsigned int size() const {
|
||||
return _activeTimeSegments;
|
||||
}
|
||||
|
||||
__forceinline unsigned int totalTimeSegments() const {
|
||||
return _totalTimeSegments;
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
|
||||
return getTimeSegmentRange(range,time_range,float(_totalTimeSegments));
|
||||
}
|
||||
|
||||
/* returns time that corresponds to time step */
|
||||
__forceinline float timeStep(const int i) const {
|
||||
assert(i>=0 && i<=(int)_totalTimeSegments);
|
||||
return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments);
|
||||
}
|
||||
|
||||
/*! checks if time range overlaps */
|
||||
__forceinline bool time_range_overlap(const BBox1f& range) const
|
||||
{
|
||||
if (0.9999f*time_range.upper <= range.lower) return false;
|
||||
if (1.0001f*time_range.lower >= range.upper) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! returns center for binning */
|
||||
__forceinline Vec3fa binCenter() const {
|
||||
return center2(bounds());
|
||||
}
|
||||
|
||||
/*! returns bounds and centroid used for binning */
|
||||
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
|
||||
{
|
||||
bounds_o = bounds();
|
||||
center_o = center2(bounds());
|
||||
}
|
||||
|
||||
/*! returns the geometry ID */
|
||||
__forceinline unsigned int geomID() const {
|
||||
return bbox.lower.a;
|
||||
}
|
||||
|
||||
/*! returns the primitive ID */
|
||||
__forceinline unsigned int primID() const {
|
||||
return bbox.upper.a;
|
||||
}
|
||||
|
||||
/*! returns an size_t sized ID */
|
||||
__forceinline size_t ID() const {
|
||||
#if defined(__64BIT__)
|
||||
return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32);
|
||||
#else
|
||||
return size_t(bbox.lower.u);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! special function for operator< */
|
||||
__forceinline uint64_t ID64() const {
|
||||
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
|
||||
}
|
||||
|
||||
/*! allows sorting the primrefs by ID */
|
||||
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
|
||||
return p0.ID64() < p1.ID64();
|
||||
}
|
||||
|
||||
/*! Outputs primitive reference to a stream. */
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
|
||||
return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
|
||||
}
|
||||
|
||||
public:
|
||||
BBox3fa bbox; // bounds, geomID, primID
|
||||
unsigned int _activeTimeSegments;
|
||||
unsigned int _totalTimeSegments;
|
||||
BBox1f time_range; // entire geometry time range
|
||||
};
|
||||
|
||||
#endif
|
||||
}
|
||||
359
engine/thirdparty/embree/kernels/builders/primrefgen.cpp
vendored
Normal file
359
engine/thirdparty/embree/kernels/builders/primrefgen.cpp
vendored
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "primrefgen.h"
|
||||
#include "primrefgen_presplit.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
}
|
||||
|
||||
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
|
||||
pinfo.time_range = t0t1;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator2 iter(scene,types,true);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
}
|
||||
|
||||
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
|
||||
pinfo.time_range = t0t1;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
size_t numPrimitives = morton.size();
|
||||
|
||||
/* compute scene bounds */
|
||||
std::pair<size_t,BBox3fa> cb_empty(0,empty);
|
||||
auto cb = parallel_reduce
|
||||
( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa>
|
||||
{
|
||||
size_t num = 0;
|
||||
BBox3fa bounds = empty;
|
||||
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa prim_bounds = empty;
|
||||
if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue;
|
||||
bounds.extend(center2(prim_bounds));
|
||||
num++;
|
||||
}
|
||||
return std::make_pair(num,bounds);
|
||||
}, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) {
|
||||
return std::make_pair(a.first + b.first,merge(a.second,b.second));
|
||||
});
|
||||
|
||||
|
||||
size_t numPrimitivesGen = cb.first;
|
||||
const BBox3fa centBounds = cb.second;
|
||||
|
||||
/* compute morton codes */
|
||||
if (likely(numPrimitivesGen == numPrimitives))
|
||||
{
|
||||
/* fast path if all primitives were valid */
|
||||
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
|
||||
parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void {
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
generator(mesh->bounds(j),unsigned(j));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/* slow path, fallback in case some primitives were invalid */
|
||||
ParallelPrefixSumState<size_t> pstate;
|
||||
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
|
||||
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
|
||||
size_t num = 0;
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (unlikely(!mesh->buildBounds(j,&bounds))) continue;
|
||||
generator(bounds,unsigned(j));
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}, std::plus<size_t>());
|
||||
|
||||
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
|
||||
size_t num = 0;
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(j,&bounds)) continue;
|
||||
generator(bounds,unsigned(j));
|
||||
num++;
|
||||
}
|
||||
return num;
|
||||
}, std::plus<size_t>());
|
||||
}
|
||||
return numPrimitivesGen;
|
||||
}
|
||||
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
|
||||
// special variants for grid meshes
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
size_t numPrimitives = 0;
|
||||
|
||||
/* first run to get #primitives */
|
||||
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator<GridMesh,false> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
|
||||
/* iterate over all meshes in the scene */
|
||||
pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j);
|
||||
if (!mesh->valid(j)) continue;
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
numPrimitives = pinfo.size();
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
|
||||
{
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
size_t numPrimitives = 0;
|
||||
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
/* iterate over all grids in a single mesh */
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j)) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,geomID_,unsigned(j));
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
numPrimitives = pinfo.size();
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID_);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
|
||||
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
LBBox3fa bounds(empty);
|
||||
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
|
||||
pinfoMB.merge(gridMB);
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfoMB.size();
|
||||
if (numPrimitives == 0) return pinfoMB;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
assert(pinfoMB.size() == numPrimitives);
|
||||
pinfoMB.time_range = t0t1;
|
||||
return pinfoMB;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
// ====================================================================================================
|
||||
|
||||
IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
IF_ENABLED_INSTANCE_ARRAY (template size_t createMortonCodeArray<InstanceArray>(InstanceArray* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
|
||||
}
|
||||
}
|
||||
37
engine/thirdparty/embree/kernels/builders/primrefgen.h
vendored
Normal file
37
engine/thirdparty/embree/kernels/builders/primrefgen.h
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/scene.h"
|
||||
#include "priminfo.h"
|
||||
#include "priminfo_mb.h"
|
||||
#include "bvh_builder_morton.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
|
||||
template<typename Mesh>
|
||||
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
|
||||
|
||||
/* special variants for grids */
|
||||
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); // FIXME: remove
|
||||
|
||||
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
|
||||
}
|
||||
}
|
||||
468
engine/thirdparty/embree/kernels/builders/primrefgen_presplit.h
vendored
Normal file
468
engine/thirdparty/embree/kernels/builders/primrefgen_presplit.h
vendored
Normal file
|
|
@ -0,0 +1,468 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
#include "../../common/algorithms/parallel_sort.h"
|
||||
#include "../builders/heuristic_spatial.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_partition.h"
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
#define DBG_PRESPLIT(x)
|
||||
#define CHECK_PRESPLIT(x)
|
||||
|
||||
#define GRID_SIZE 1024
|
||||
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
|
||||
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
|
||||
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
|
||||
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct SplittingGrid
|
||||
{
|
||||
__forceinline SplittingGrid(const BBox3fa& bounds)
|
||||
{
|
||||
base = bounds.lower;
|
||||
const Vec3fa diag = bounds.size();
|
||||
extend = max(diag.x,max(diag.y,diag.z));
|
||||
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
|
||||
}
|
||||
|
||||
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
|
||||
{
|
||||
/* compute morton code */
|
||||
const Vec3fa lower = prim.lower;
|
||||
const Vec3fa upper = prim.upper;
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
|
||||
/* if all bits are equal then we cannot split */
|
||||
if (unlikely(lower_code == upper_code))
|
||||
return false;
|
||||
|
||||
/* compute octree level and dimension to perform the split in */
|
||||
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
|
||||
const unsigned int level = diff / 3;
|
||||
const unsigned int dim = diff % 3;
|
||||
|
||||
/* now we compute the grid position of the split */
|
||||
const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
|
||||
|
||||
/* compute world space position of split */
|
||||
const float inv_grid_size = 1.0f / GRID_SIZE;
|
||||
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
|
||||
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
|
||||
|
||||
dim_o = dim;
|
||||
fsplit_o = fsplit;
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline Vec2i computeMC(const PrimRef& ref) const
|
||||
{
|
||||
const Vec3fa lower = ref.lower;
|
||||
const Vec3fa upper = ref.upper;
|
||||
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
|
||||
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
|
||||
Vec3ia ilower(floor(glower));
|
||||
Vec3ia iupper(floor(gupper));
|
||||
|
||||
/* this ignores dimensions that are empty */
|
||||
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
|
||||
|
||||
/* compute a morton code for the lower and upper grid coordinates. */
|
||||
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
|
||||
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
|
||||
return Vec2i(lower_code,upper_code);
|
||||
}
|
||||
|
||||
Vec3fa base;
|
||||
float scale;
|
||||
float extend;
|
||||
};
|
||||
|
||||
struct PresplitItem
|
||||
{
|
||||
union {
|
||||
float priority;
|
||||
unsigned int data;
|
||||
};
|
||||
unsigned int index;
|
||||
|
||||
__forceinline operator unsigned() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
template<typename ProjectedPrimitiveAreaFunc>
|
||||
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
|
||||
{
|
||||
const float area_aabb = area(ref.bounds());
|
||||
const float area_prim = primitiveArea(ref);
|
||||
if (area_prim == 0.0f) return 0.0f;
|
||||
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
|
||||
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
|
||||
const float area_diff = max(0.0f, area_aabb - area_prim);
|
||||
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
|
||||
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
|
||||
//const float priority = sqrtf(sqrtf( area_diff ) );
|
||||
//const float priority = sqrtfarea_diff;
|
||||
//const float priority = area_diff; // 104 fps !!!!!!!!!!
|
||||
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
|
||||
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
|
||||
//const float priority = area_prim;
|
||||
assert(priority >= 0.0f && priority < FLT_LARGE);
|
||||
return priority;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
|
||||
return cout << "index " << item.index << " priority " << item.priority;
|
||||
};
|
||||
|
||||
#if 1
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
if (splitprims == 1)
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(prim, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = prim;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
PrimRef left,right;
|
||||
splitter(prim,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
const unsigned int splitprims_left = splitprims/2;
|
||||
const unsigned int splitprims_right = splitprims - splitprims_left;
|
||||
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
|
||||
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<typename Splitter>
|
||||
void splitPrimitive(const Splitter& splitter,
|
||||
const PrimRef& prim,
|
||||
const unsigned int targetSubPrims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
|
||||
return area(a.bounds()) < area(b.bounds());
|
||||
};
|
||||
|
||||
subPrims[numSubPrims++] = prim;
|
||||
|
||||
while (numSubPrims < targetSubPrims)
|
||||
{
|
||||
/* get top heap element */
|
||||
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
|
||||
PrimRef top = subPrims[--numSubPrims];
|
||||
|
||||
unsigned int dim; float fsplit;
|
||||
if (!grid.split_pos(top, dim, fsplit))
|
||||
{
|
||||
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
subPrims[numSubPrims++] = top;
|
||||
return;
|
||||
}
|
||||
|
||||
/* split primitive */
|
||||
PrimRef left,right;
|
||||
splitter(top,dim,fsplit,left,right);
|
||||
assert(!left.bounds().empty());
|
||||
assert(!right.bounds().empty());
|
||||
|
||||
subPrims[numSubPrims++] = left;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
|
||||
subPrims[numSubPrims++] = right;
|
||||
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelPrefixSumState<PrimInfo> pstate;
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
|
||||
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
|
||||
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
|
||||
PrimVector& prims,
|
||||
const PrimInfo& pinfo,
|
||||
const SplitPrimitiveFunc& splitPrimitive,
|
||||
const ProjectedPrimitiveAreaFunc& primitiveArea)
|
||||
{
|
||||
static const size_t MIN_STEP_SIZE = 128;
|
||||
|
||||
/* use correct number of primitives */
|
||||
size_t numPrimitives = pinfo.size();
|
||||
const size_t numPrimitivesExt = prims.size();
|
||||
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
|
||||
|
||||
/* allocate double buffer presplit items */
|
||||
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
|
||||
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
|
||||
|
||||
/* compute grid */
|
||||
SplittingGrid grid(pinfo.geomBounds);
|
||||
|
||||
/* init presplit items and get total sum */
|
||||
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
|
||||
float sum = 0.0f;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
preSplitItem0[i].index = (unsigned int)i;
|
||||
const Vec2i mc = grid.computeMC(prims[i]);
|
||||
/* if all bits are equal then we cannot split */
|
||||
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
|
||||
/* FIXME: sum undeterministic */
|
||||
sum += preSplitItem0[i].priority;
|
||||
}
|
||||
return sum;
|
||||
},[](const float& a, const float& b) -> float { return a+b; });
|
||||
|
||||
/* compute number of splits per primitive */
|
||||
const float inv_psum = 1.0f / psum;
|
||||
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
{
|
||||
if (preSplitItem0[i].priority <= 0.0f) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
|
||||
if (rel_p < 1) {
|
||||
preSplitItem0[i].data = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
|
||||
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
|
||||
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
|
||||
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
}
|
||||
});
|
||||
|
||||
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
|
||||
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
|
||||
assert(center <= numPrimitives);
|
||||
|
||||
/* anything to split ? */
|
||||
if (center >= numPrimitives)
|
||||
return pinfo;
|
||||
|
||||
size_t numPrimitivesToSplit = numPrimitives - center;
|
||||
assert(preSplitItem0[center].data >= 1.0f);
|
||||
|
||||
/* sort presplit items in ascending order */
|
||||
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
|
||||
|
||||
CHECK_PRESPLIT(
|
||||
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
|
||||
});
|
||||
);
|
||||
|
||||
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
|
||||
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
|
||||
|
||||
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
|
||||
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
|
||||
size_t sum = 0;
|
||||
for (size_t i=t.begin(); i<t.end(); i++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[i].index;
|
||||
const unsigned int splitprims = preSplitItem0[i].data;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
assert(numSubPrims);
|
||||
|
||||
numSubPrims--; // can reuse slot
|
||||
sum+=numSubPrims;
|
||||
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
|
||||
|
||||
primOffset0[i-center] = numSubPrims;
|
||||
}
|
||||
return sum;
|
||||
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
|
||||
|
||||
/* if we are over budget, need to shrink the range */
|
||||
if (totalNumSubPrims > numSplitPrimitivesBudget)
|
||||
{
|
||||
size_t new_center = numPrimitives-1;
|
||||
size_t sum = 0;
|
||||
for (;new_center>=center;new_center--)
|
||||
{
|
||||
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
|
||||
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
|
||||
sum += numSubPrims;
|
||||
}
|
||||
new_center++;
|
||||
|
||||
primOffset0 += new_center - center;
|
||||
numPrimitivesToSplit -= new_center - center;
|
||||
center = new_center;
|
||||
assert(numPrimitivesToSplit == (numPrimitives - center));
|
||||
}
|
||||
|
||||
/* parallel prefix sum to compute offsets for storing sub-primitives */
|
||||
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
|
||||
assert(numPrimitives+offset <= numPrimitivesExt);
|
||||
|
||||
/* iterate over range, and split primitives into sub primitives and append them to prims array */
|
||||
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
|
||||
for (size_t j=rn.begin(); j<rn.end(); j++)
|
||||
{
|
||||
const unsigned int primrefID = preSplitItem0[j].index;
|
||||
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
|
||||
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
|
||||
|
||||
unsigned int numSubPrims = 0;
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
|
||||
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
|
||||
|
||||
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
|
||||
assert(numSubPrims-1 == numSubPrimsExpected);
|
||||
|
||||
const size_t newID = numPrimitives + primOffset1[j-center];
|
||||
assert(newID+numSubPrims-1 <= numPrimitivesExt);
|
||||
|
||||
prims[primrefID] = subPrims[0];
|
||||
for (size_t i=1;i<numSubPrims;i++)
|
||||
prims[newID+i-1] = subPrims[i];
|
||||
}
|
||||
});
|
||||
|
||||
numPrimitives += offset;
|
||||
|
||||
/* recompute centroid bounding boxes */
|
||||
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
|
||||
PrimInfo p(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
p.add_center2(prims[j]);
|
||||
return p;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo1.size() == numPrimitives);
|
||||
|
||||
return pinfo1;
|
||||
}
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
template<typename Mesh, typename SplitterFactory>
|
||||
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
|
||||
{
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator2 iter(scene,types,mblur);
|
||||
|
||||
/* first try */
|
||||
progressMonitor(0);
|
||||
pstate.init(iter,size_t(1024));
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
/* if we need to filter out geometry, run again */
|
||||
if (pinfo.size() != numPrimRefs)
|
||||
{
|
||||
progressMonitor(0);
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
}
|
||||
|
||||
|
||||
SplitterFactory Splitter(scene);
|
||||
|
||||
auto split_primitive = [&] (const PrimRef &prim,
|
||||
const unsigned int splitprims,
|
||||
const SplittingGrid& grid,
|
||||
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
|
||||
unsigned int& numSubPrims)
|
||||
{
|
||||
const auto splitter = Splitter(prim);
|
||||
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
|
||||
};
|
||||
|
||||
auto primitiveArea = [&] (const PrimRef &ref) {
|
||||
const unsigned int geomID = ref.geomID();
|
||||
const unsigned int primID = ref.primID();
|
||||
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
|
||||
};
|
||||
|
||||
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
240
engine/thirdparty/embree/kernels/builders/splitter.h
vendored
Normal file
240
engine/thirdparty/embree/kernels/builders/splitter.h
vendored
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
#include "../common/scene.h"
|
||||
#endif
|
||||
|
||||
#include "../builders/primref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
BBox3fa& left_o,
|
||||
BBox3fa& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
/* clip triangle to left and right box by processing all edges */
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const float inv_length = 1.0f/(v1d-v0d);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
left_o = intersect(left,bounds);
|
||||
right_o = intersect(right,bounds);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const BBox3fa& bounds,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
const Vec3fa (&inv_length)[N],
|
||||
BBox3fa& left_o,
|
||||
BBox3fa& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
/* clip triangle to left and right box by processing all edges */
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
left_o = intersect(left,bounds);
|
||||
right_o = intersect(right,bounds);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
__forceinline void splitPolygon(const PrimRef& prim,
|
||||
const size_t dim,
|
||||
const float pos,
|
||||
const Vec3fa (&v)[N+1],
|
||||
PrimRef& left_o,
|
||||
PrimRef& right_o)
|
||||
{
|
||||
BBox3fa left = empty, right = empty;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const Vec3fa &v0 = v[i];
|
||||
const Vec3fa &v1 = v[i+1];
|
||||
const float v0d = v0[dim];
|
||||
const float v1d = v1[dim];
|
||||
|
||||
if (v0d <= pos) left. extend(v0); // this point is on left side
|
||||
if (v0d >= pos) right.extend(v0); // this point is on right side
|
||||
|
||||
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
|
||||
{
|
||||
assert((v1d-v0d) != 0.0f);
|
||||
const float inv_length = 1.0f/(v1d-v0d);
|
||||
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
|
||||
left.extend(c);
|
||||
right.extend(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* clip against current bounds */
|
||||
new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
|
||||
new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
|
||||
}
|
||||
|
||||
#if !defined(RTHWIF_STANDALONE)
|
||||
|
||||
struct TriangleSplitter
|
||||
{
|
||||
__forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask );
|
||||
TriangleMesh::Triangle tri = mesh->triangle(prim.primID());
|
||||
v[0] = mesh->vertex(tri.v[0]);
|
||||
v[1] = mesh->vertex(tri.v[1]);
|
||||
v[2] = mesh->vertex(tri.v[2]);
|
||||
v[3] = mesh->vertex(tri.v[0]);
|
||||
inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
|
||||
inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
|
||||
inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
splitPolygon<3>(prim,dim,pos,v,left_o,right_o);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o);
|
||||
}
|
||||
|
||||
private:
|
||||
Vec3fa v[4];
|
||||
Vec3fa inv_length[3];
|
||||
};
|
||||
|
||||
struct TriangleSplitterFactory
|
||||
{
|
||||
__forceinline TriangleSplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline TriangleSplitter operator() (const PrimRef& prim) const {
|
||||
return TriangleSplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
|
||||
struct QuadSplitter
|
||||
{
|
||||
__forceinline QuadSplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
|
||||
const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );
|
||||
QuadMesh::Quad quad = mesh->quad(prim.primID());
|
||||
v[0] = mesh->vertex(quad.v[1]);
|
||||
v[1] = mesh->vertex(quad.v[2]);
|
||||
v[2] = mesh->vertex(quad.v[3]);
|
||||
v[3] = mesh->vertex(quad.v[0]);
|
||||
v[4] = mesh->vertex(quad.v[1]);
|
||||
v[5] = mesh->vertex(quad.v[3]);
|
||||
inv_length[0] = Vec3fa(1.0f) / (v[1] - v[0]);
|
||||
inv_length[1] = Vec3fa(1.0f) / (v[2] - v[1]);
|
||||
inv_length[2] = Vec3fa(1.0f) / (v[3] - v[2]);
|
||||
inv_length[3] = Vec3fa(1.0f) / (v[4] - v[3]);
|
||||
inv_length[4] = Vec3fa(1.0f) / (v[5] - v[4]);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
splitPolygon<5>(prim,dim,pos,v,left_o,right_o);
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
splitPolygon<5>(prim,dim,pos,v,inv_length,left_o,right_o);
|
||||
}
|
||||
|
||||
private:
|
||||
Vec3fa v[6];
|
||||
Vec3fa inv_length[5];
|
||||
};
|
||||
|
||||
struct QuadSplitterFactory
|
||||
{
|
||||
__forceinline QuadSplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline QuadSplitter operator() (const PrimRef& prim) const {
|
||||
return QuadSplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
|
||||
|
||||
struct DummySplitter
|
||||
{
|
||||
__forceinline DummySplitter(const Scene* scene, const PrimRef& prim)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct DummySplitterFactory
|
||||
{
|
||||
__forceinline DummySplitterFactory(const Scene* scene)
|
||||
: scene(scene) {}
|
||||
|
||||
__forceinline DummySplitter operator() (const PrimRef& prim) const {
|
||||
return DummySplitter(scene,prim);
|
||||
}
|
||||
|
||||
private:
|
||||
const Scene* scene;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
190
engine/thirdparty/embree/kernels/bvh/bvh.cpp
vendored
Normal file
190
engine/thirdparty/embree/kernels/bvh/bvh.cpp
vendored
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_statistics.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
|
||||
: AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
|
||||
primTy(&primTy), device(scene->device), scene(scene),
|
||||
root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
|
||||
{
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BVHN<N>::~BVHN ()
|
||||
{
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
delete objects[i];
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::clear()
|
||||
{
|
||||
set(BVHN::emptyNode,empty,0);
|
||||
alloc.clear();
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
|
||||
{
|
||||
this->root = root;
|
||||
this->bounds = bounds;
|
||||
this->numPrimitives = numPrimitives;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::clearBarrier(NodeRef& node)
|
||||
{
|
||||
if (node.isBarrier())
|
||||
node.clearBarrier();
|
||||
else if (!node.isLeaf()) {
|
||||
BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
|
||||
for (size_t c=0; c<N; c++)
|
||||
clearBarrier(n->child(c));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::layoutLargeNodes(size_t num)
|
||||
{
|
||||
#if defined(__64BIT__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
|
||||
struct NodeArea
|
||||
{
|
||||
__forceinline NodeArea() {}
|
||||
|
||||
__forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
|
||||
: node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
|
||||
|
||||
__forceinline bool operator< (const NodeArea& other) const {
|
||||
return this->A < other.A;
|
||||
}
|
||||
|
||||
NodeRef* node;
|
||||
float A;
|
||||
};
|
||||
std::vector<NodeArea> lst;
|
||||
lst.reserve(num);
|
||||
lst.push_back(NodeArea(root,empty));
|
||||
|
||||
while (lst.size() < num)
|
||||
{
|
||||
std::pop_heap(lst.begin(), lst.end());
|
||||
NodeArea n = lst.back(); lst.pop_back();
|
||||
if (!n.node->isAABBNode()) break;
|
||||
AABBNode* node = n.node->getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVHN::emptyNode) continue;
|
||||
lst.push_back(NodeArea(node->child(i),node->bounds(i)));
|
||||
std::push_heap(lst.begin(), lst.end());
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i=0; i<lst.size(); i++)
|
||||
lst[i].node->setBarrier();
|
||||
|
||||
root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
|
||||
{
|
||||
if (node.isBarrier()) {
|
||||
node.clearBarrier();
|
||||
return node;
|
||||
}
|
||||
else if (node.isAABBNode())
|
||||
{
|
||||
AABBNode* oldnode = node.getAABBNode();
|
||||
AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
|
||||
*newnode = *oldnode;
|
||||
for (size_t c=0; c<N; c++)
|
||||
newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
|
||||
return encodeNode(newnode);
|
||||
}
|
||||
else return node;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
double BVHN<N>::preBuild(const std::string& builderName)
|
||||
{
|
||||
if (builderName == "")
|
||||
return inf;
|
||||
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
|
||||
}
|
||||
|
||||
double t0 = 0.0;
|
||||
if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
|
||||
return t0;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHN<N>::postBuild(double t0)
|
||||
{
|
||||
if (t0 == double(inf))
|
||||
return;
|
||||
|
||||
double dt = 0.0;
|
||||
if (device->benchmark || device->verbosity(2))
|
||||
dt = getSeconds()-t0;
|
||||
|
||||
std::unique_ptr<BVHNStatistics<N>> stat;
|
||||
|
||||
/* print statistics */
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
if (!stat) stat.reset(new BVHNStatistics<N>(this));
|
||||
const size_t usedBytes = alloc.getUsedBytes();
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
|
||||
|
||||
if (device->verbosity(2))
|
||||
std::cout << stat->str();
|
||||
|
||||
if (device->verbosity(2))
|
||||
{
|
||||
FastAllocator::AllStatistics stat(&alloc);
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
if (objects[i])
|
||||
stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
|
||||
|
||||
stat.print(numPrimitives);
|
||||
}
|
||||
|
||||
if (device->verbosity(3))
|
||||
{
|
||||
alloc.print_blocks();
|
||||
for (size_t i=0; i<objects.size(); i++)
|
||||
if (objects[i])
|
||||
objects[i]->alloc.print_blocks();
|
||||
}
|
||||
|
||||
std::cout << std::flush;
|
||||
}
|
||||
|
||||
/* benchmark mode */
|
||||
if (device->benchmark)
|
||||
{
|
||||
if (!stat) stat.reset(new BVHNStatistics<N>(this));
|
||||
Lock<MutexSys> lock(g_printMutex);
|
||||
std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template class BVHN<8>;
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
|
||||
template class BVHN<4>;
|
||||
#endif
|
||||
}
|
||||
|
||||
235
engine/thirdparty/embree/kernels/bvh/bvh.h
vendored
Normal file
235
engine/thirdparty/embree/kernels/bvh/bvh.h
vendored
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
/* include all node types */
|
||||
#include "bvh_node_aabb.h"
|
||||
#include "bvh_node_aabb_mb.h"
|
||||
#include "bvh_node_aabb_mb4d.h"
|
||||
#include "bvh_node_obb.h"
|
||||
#include "bvh_node_obb_mb.h"
|
||||
#include "bvh_node_qaabb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! flags used to enable specific node types in intersectors */
|
||||
enum BVHNodeFlags
|
||||
{
|
||||
BVH_FLAG_ALIGNED_NODE = 0x00001,
|
||||
BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
|
||||
BVH_FLAG_UNALIGNED_NODE = 0x00100,
|
||||
BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
|
||||
BVH_FLAG_QUANTIZED_NODE = 0x100000,
|
||||
BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
|
||||
|
||||
/* short versions */
|
||||
BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
|
||||
BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
|
||||
BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
|
||||
BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
|
||||
BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
|
||||
BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
|
||||
BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
|
||||
BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
|
||||
};
|
||||
|
||||
/*! Multi BVH with N children. Each node stores the bounding box of
|
||||
* it's N children as well as N child references. */
|
||||
template<int N>
|
||||
class BVHN : public AccelData
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
public:
|
||||
|
||||
/*! forward declaration of node ref type */
|
||||
typedef NodeRefPtr<N> NodeRef;
|
||||
typedef BaseNode_t<NodeRef,N> BaseNode;
|
||||
typedef AABBNode_t<NodeRef,N> AABBNode;
|
||||
typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
|
||||
typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
|
||||
typedef OBBNode_t<NodeRef,N> OBBNode;
|
||||
typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
|
||||
typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
|
||||
typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
|
||||
typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
|
||||
|
||||
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
|
||||
static const size_t byteAlignment = 16;
|
||||
static const size_t byteNodeAlignment = 4*N;
|
||||
|
||||
/*! Empty node */
|
||||
static const size_t emptyNode = NodeRef::emptyNode;
|
||||
|
||||
/*! Invalid node, used as marker in traversal */
|
||||
static const size_t invalidNode = NodeRef::invalidNode;
|
||||
static const size_t popRay = NodeRef::popRay;
|
||||
|
||||
/*! Maximum depth of the BVH. */
|
||||
static const size_t maxBuildDepth = 32;
|
||||
static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
|
||||
static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
|
||||
|
||||
/*! Maximum number of primitive blocks in a leaf. */
|
||||
static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
|
||||
|
||||
public:
|
||||
|
||||
/*! Builder interface to create allocator */
|
||||
struct CreateAlloc : public FastAllocator::Create {
|
||||
__forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
|
||||
};
|
||||
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
public:
|
||||
|
||||
/*! BVHN default constructor. */
|
||||
BVHN (const PrimitiveType& primTy, Scene* scene);
|
||||
|
||||
/*! BVHN destruction */
|
||||
~BVHN ();
|
||||
|
||||
/*! clears the acceleration structure */
|
||||
void clear();
|
||||
|
||||
/*! sets BVH members after build */
|
||||
void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
|
||||
|
||||
/*! Clears the barrier bits of a subtree. */
|
||||
void clearBarrier(NodeRef& node);
|
||||
|
||||
/*! lays out num large nodes of the BVH */
|
||||
void layoutLargeNodes(size_t num);
|
||||
NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
|
||||
|
||||
/*! called by all builders before build starts */
|
||||
double preBuild(const std::string& builderName);
|
||||
|
||||
/*! called by all builders after build ended */
|
||||
void postBuild(double t0);
|
||||
|
||||
/*! allocator class */
|
||||
struct Allocator {
|
||||
BVHN* bvh;
|
||||
Allocator (BVHN* bvh) : bvh(bvh) {}
|
||||
__forceinline void* operator() (size_t bytes) const {
|
||||
return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
|
||||
}
|
||||
};
|
||||
|
||||
/*! post build cleanup */
|
||||
void cleanup() {
|
||||
alloc.cleanup();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! Encodes a node */
|
||||
static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
|
||||
static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
|
||||
static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
|
||||
|
||||
public:
|
||||
|
||||
/*! Prefetches the node this reference points to */
|
||||
__forceinline static void prefetch(const NodeRef ref, int types=0)
|
||||
{
|
||||
#if defined(__AVX512PF__) // MIC
|
||||
if (types != BVH_FLAG_QUANTIZED_NODE) {
|
||||
prefetchL2(((char*)ref.ptr)+0*64);
|
||||
prefetchL2(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
prefetchL2(((char*)ref.ptr)+2*64);
|
||||
prefetchL2(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
/* KNL still needs L2 prefetches for large nodes */
|
||||
prefetchL2(((char*)ref.ptr)+4*64);
|
||||
prefetchL2(((char*)ref.ptr)+5*64);
|
||||
prefetchL2(((char*)ref.ptr)+6*64);
|
||||
prefetchL2(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* todo: reduce if 32bit offsets are enabled */
|
||||
prefetchL2(((char*)ref.ptr)+0*64);
|
||||
prefetchL2(((char*)ref.ptr)+1*64);
|
||||
prefetchL2(((char*)ref.ptr)+2*64);
|
||||
}
|
||||
#else
|
||||
if (types != BVH_FLAG_QUANTIZED_NODE) {
|
||||
prefetchL1(((char*)ref.ptr)+0*64);
|
||||
prefetchL1(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
prefetchL1(((char*)ref.ptr)+2*64);
|
||||
prefetchL1(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
/* deactivate for large nodes on Xeon, as it introduces regressions */
|
||||
//prefetchL1(((char*)ref.ptr)+4*64);
|
||||
//prefetchL1(((char*)ref.ptr)+5*64);
|
||||
//prefetchL1(((char*)ref.ptr)+6*64);
|
||||
//prefetchL1(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* todo: reduce if 32bit offsets are enabled */
|
||||
prefetchL1(((char*)ref.ptr)+0*64);
|
||||
prefetchL1(((char*)ref.ptr)+1*64);
|
||||
prefetchL1(((char*)ref.ptr)+2*64);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void prefetchW(const NodeRef ref, int types=0)
|
||||
{
|
||||
embree::prefetchEX(((char*)ref.ptr)+0*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+1*64);
|
||||
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
embree::prefetchEX(((char*)ref.ptr)+2*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+3*64);
|
||||
}
|
||||
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
|
||||
embree::prefetchEX(((char*)ref.ptr)+4*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+5*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+6*64);
|
||||
embree::prefetchEX(((char*)ref.ptr)+7*64);
|
||||
}
|
||||
}
|
||||
|
||||
/*! bvh type information */
|
||||
public:
|
||||
const PrimitiveType* primTy; //!< primitive type stored in the BVH
|
||||
|
||||
/*! bvh data */
|
||||
public:
|
||||
Device* device; //!< device pointer
|
||||
Scene* scene; //!< scene pointer
|
||||
NodeRef root; //!< root node
|
||||
FastAllocator alloc; //!< allocator used to allocate nodes
|
||||
|
||||
/*! statistics data */
|
||||
public:
|
||||
size_t numPrimitives; //!< number of primitives the BVH is build over
|
||||
size_t numVertices; //!< number of vertices the BVH references
|
||||
|
||||
/*! data arrays for special builders */
|
||||
public:
|
||||
std::vector<BVHN*> objects;
|
||||
vector_t<char,aligned_allocator<char,32>> subdiv_patches;
|
||||
};
|
||||
|
||||
typedef BVHN<4> BVH4;
|
||||
typedef BVHN<8> BVH8;
|
||||
}
|
||||
1348
engine/thirdparty/embree/kernels/bvh/bvh4_factory.cpp
vendored
Normal file
1348
engine/thirdparty/embree/kernels/bvh/bvh4_factory.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
318
engine/thirdparty/embree/kernels/bvh/bvh4_factory.h
vendored
Normal file
318
engine/thirdparty/embree/kernels/bvh/bvh4_factory.h
vendored
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_factory.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH4 instantiations */
|
||||
class BVH4Factory : public BVHFactory
|
||||
{
|
||||
public:
|
||||
BVH4Factory(int bfeatures, int ifeatures);
|
||||
|
||||
public:
|
||||
Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
|
||||
|
||||
Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
|
||||
Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH4QuantizedTriangle4i(Scene* scene);
|
||||
Accel* BVH4QuantizedQuad4i(Scene* scene);
|
||||
|
||||
Accel* BVH4SubdivPatch1(Scene* scene);
|
||||
Accel* BVH4SubdivPatch1MB(Scene* scene);
|
||||
|
||||
Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4UserGeometryMB(Scene* scene);
|
||||
|
||||
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH4InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
private:
|
||||
void selectBuilders(int features);
|
||||
void selectIntersectors(int features);
|
||||
|
||||
private:
|
||||
Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
|
||||
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
|
||||
|
||||
Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
|
||||
|
||||
private:
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
|
||||
|
||||
// ==============
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
|
||||
|
||||
// ==============
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// spatial scene builder
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// twolevel scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
1190
engine/thirdparty/embree/kernels/bvh/bvh8_factory.cpp
vendored
Normal file
1190
engine/thirdparty/embree/kernels/bvh/bvh8_factory.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
284
engine/thirdparty/embree/kernels/bvh/bvh8_factory.h
vendored
Normal file
284
engine/thirdparty/embree/kernels/bvh/bvh8_factory.h
vendored
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_factory.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH8 instantiations */
|
||||
class BVH8Factory : public BVHFactory
|
||||
{
|
||||
public:
|
||||
BVH8Factory(int bfeatures, int ifeatures);
|
||||
|
||||
public:
|
||||
Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
|
||||
Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
|
||||
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
|
||||
|
||||
Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
Accel* BVH8QuantizedTriangle4i(Scene* scene);
|
||||
Accel* BVH8QuantizedTriangle4(Scene* scene);
|
||||
Accel* BVH8QuantizedQuad4i(Scene* scene);
|
||||
|
||||
Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8UserGeometryMB(Scene* scene);
|
||||
|
||||
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
|
||||
|
||||
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
|
||||
Accel* BVH8InstanceArrayMB(Scene* scene);
|
||||
|
||||
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
|
||||
|
||||
private:
|
||||
void selectBuilders(int features);
|
||||
void selectIntersectors(int features);
|
||||
|
||||
private:
|
||||
Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
|
||||
Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
|
||||
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
|
||||
|
||||
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
|
||||
|
||||
private:
|
||||
DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
|
||||
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
|
||||
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
|
||||
|
||||
// SAH scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
|
||||
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// SAH spatial scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
|
||||
|
||||
// twolevel scene builders
|
||||
private:
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
|
||||
};
|
||||
}
|
||||
60
engine/thirdparty/embree/kernels/bvh/bvh_builder.cpp
vendored
Normal file
60
engine/thirdparty/embree/kernels/bvh/bvh_builder.cpp
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_builder.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRef>
|
||||
(FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRef>
|
||||
(FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
|
||||
{
|
||||
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
|
||||
return createLeaf(prims,set,alloc);
|
||||
};
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
return BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
|
||||
}
|
||||
|
||||
template struct BVHNBuilderVirtual<4>;
|
||||
template struct BVHNBuilderQuantizedVirtual<4>;
|
||||
template struct BVHNBuilderMblurVirtual<4>;
|
||||
|
||||
#if defined(__AVX__)
|
||||
template struct BVHNBuilderVirtual<8>;
|
||||
template struct BVHNBuilderQuantizedVirtual<8>;
|
||||
template struct BVHNBuilderMblurVirtual<8>;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
115
engine/thirdparty/embree/kernels/bvh/bvh_builder.h
vendored
Normal file
115
engine/thirdparty/embree/kernels/bvh/bvh_builder.h
vendored
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
|
||||
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderQuantizedVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
|
||||
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderMblurVirtual
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNodeMB AABBNodeMB;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
typedef FastAllocator::CachedAllocator Allocator;
|
||||
|
||||
struct BVHNBuilderV {
|
||||
NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
|
||||
virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
struct BVHNBuilderT : public BVHNBuilderV
|
||||
{
|
||||
BVHNBuilderT (CreateLeafFunc createLeafFunc)
|
||||
: createLeafFunc(createLeafFunc) {}
|
||||
|
||||
NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
|
||||
return createLeafFunc(prims,set,alloc);
|
||||
}
|
||||
|
||||
private:
|
||||
CreateLeafFunc createLeafFunc;
|
||||
};
|
||||
|
||||
template<typename CreateLeafFunc>
|
||||
static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
|
||||
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
583
engine/thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp
vendored
Normal file
583
engine/thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp
vendored
Normal file
|
|
@ -0,0 +1,583 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_statistics.h"
|
||||
#include "bvh_rotate.h"
|
||||
#include "../common/profile.h"
|
||||
#include "../../common/algorithms/parallel_prefix_sum.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/bvh_builder_morton.h"
|
||||
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
#if defined(__64BIT__)
|
||||
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
|
||||
#else
|
||||
# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
struct SetBVHNBounds
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
BVH* bvh;
|
||||
__forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
|
||||
BBox3fa res = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
const BBox3fa b = children[i].bounds;
|
||||
res.extend(b);
|
||||
node->setRef(i,children[i].ref);
|
||||
node->setBounds(i,b);
|
||||
}
|
||||
|
||||
BBox3fx result = (BBox3fx&)res;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
{
|
||||
size_t n = 0;
|
||||
for (size_t i=0; i<num; i++)
|
||||
n += children[i].bounds.lower.a;
|
||||
|
||||
if (n >= 4096) {
|
||||
for (size_t i=0; i<num; i++) {
|
||||
if (children[i].bounds.lower.a < 4096) {
|
||||
for (int j=0; j<ROTATE_TREE; j++)
|
||||
BVHNRotate<N>::rotate(node->child(i));
|
||||
node->child(i).setBarrier();
|
||||
}
|
||||
}
|
||||
}
|
||||
result.lower.a = unsigned(n);
|
||||
}
|
||||
#endif
|
||||
|
||||
return NodeRecord(ref,result);
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateMortonLeaf;
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
|
||||
const TriangleMesh* __restrict__ const mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
}
|
||||
|
||||
Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = unsigned(current.size());
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4v>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
|
||||
const TriangleMesh* __restrict__ mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
}
|
||||
Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Triangle4i>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
|
||||
vuint4 v0 = zero, v1 = zero, v2 = zero;
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
const TriangleMesh* __restrict__ const mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
|
||||
vgeomID[i] = geomID_;
|
||||
vprimID[i] = primID;
|
||||
unsigned int int_stride = mesh->vertices0.getStride()/4;
|
||||
v0[i] = tri.v[0] * int_stride;
|
||||
v1[i] = tri.v[1] * int_stride;
|
||||
v2[i] = tri.v[2] * int_stride;
|
||||
}
|
||||
|
||||
for (size_t i=items; i<4; i++)
|
||||
{
|
||||
vgeomID[i] = vgeomID[0];
|
||||
vprimID[i] = -1;
|
||||
v0[i] = 0;
|
||||
v1[i] = 0;
|
||||
v2[i] = 0;
|
||||
}
|
||||
Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
TriangleMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Quad4v>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items<=4);
|
||||
|
||||
/* allocate leaf node */
|
||||
Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
|
||||
|
||||
vuint4 vgeomID = -1, vprimID = -1;
|
||||
Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
|
||||
const QuadMesh* __restrict__ mesh = this->mesh;
|
||||
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
const QuadMesh::Quad& tri = mesh->quad(primID);
|
||||
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
|
||||
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
|
||||
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
|
||||
const Vec3fa& p3 = mesh->vertex(tri.v[3]);
|
||||
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
|
||||
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
|
||||
vgeomID [i] = geomID_;
|
||||
vprimID [i] = primID;
|
||||
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
|
||||
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
|
||||
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
|
||||
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
|
||||
}
|
||||
Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
|
||||
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
QuadMesh* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,Object>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
|
||||
/* allocate leaf node */
|
||||
Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const UserGeometry* mesh = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int index = morton[start+i].index;
|
||||
const unsigned int primID = index;
|
||||
bounds.extend(mesh->bounds(primID));
|
||||
new (&accel[i]) Object(geomID_,primID);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
UserGeometry* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,InstancePrimitive>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items <= 1);
|
||||
|
||||
/* allocate leaf node */
|
||||
InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const Instance* instance = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
bounds.extend(instance->bounds(primID));
|
||||
new (&accel[i]) InstancePrimitive(instance, geomID_);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
Instance* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
|
||||
: mesh(mesh), morton(morton), geomID_(geomID) {}
|
||||
|
||||
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
|
||||
{
|
||||
vfloat4 lower(pos_inf);
|
||||
vfloat4 upper(neg_inf);
|
||||
size_t items = current.size();
|
||||
size_t start = current.begin();
|
||||
assert(items <= 1);
|
||||
|
||||
/* allocate leaf node */
|
||||
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
|
||||
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
|
||||
const InstanceArray* instance = this->mesh;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
{
|
||||
const unsigned int primID = morton[start+i].index;
|
||||
bounds.extend(instance->bounds(primID));
|
||||
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
|
||||
}
|
||||
|
||||
BBox3fx box_o = (BBox3fx&)bounds;
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
box_o.lower.a = current.size();
|
||||
#endif
|
||||
return NodeRecord(ref,box_o);
|
||||
}
|
||||
private:
|
||||
InstanceArray* mesh;
|
||||
BVHBuilderMorton::BuildPrim* morton;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
};
|
||||
|
||||
template<typename Mesh>
|
||||
struct CalculateMeshBounds
|
||||
{
|
||||
__forceinline CalculateMeshBounds (Mesh* mesh)
|
||||
: mesh(mesh) {}
|
||||
|
||||
__forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
|
||||
return mesh->bounds(morton.index);
|
||||
}
|
||||
|
||||
private:
|
||||
Mesh* mesh;
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNMeshBuilderMorton : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecord NodeRecord;
|
||||
|
||||
public:
|
||||
|
||||
BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
|
||||
: bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
|
||||
|
||||
/* build function */
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
morton.clear();
|
||||
}
|
||||
size_t numPrimitives = mesh->size();
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
|
||||
/* skip build for empty scene */
|
||||
if (numPrimitives == 0) {
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* preallocate arrays */
|
||||
morton.resize(numPrimitives);
|
||||
size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
|
||||
bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
|
||||
bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
|
||||
|
||||
/* create morton code array */
|
||||
BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
|
||||
size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
|
||||
|
||||
/* create BVH */
|
||||
SetBVHNBounds<N> setBounds(bvh);
|
||||
CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
|
||||
CalculateMeshBounds<Mesh> calculateBounds(mesh);
|
||||
auto root = BVHBuilderMorton::build<NodeRecord>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create(),
|
||||
setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
|
||||
morton.data(),dest,numPrimitivesGen,settings);
|
||||
|
||||
bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
|
||||
|
||||
#if ROTATE_TREE
|
||||
if (N == 4)
|
||||
{
|
||||
for (int i=0; i<ROTATE_TREE; i++)
|
||||
BVHNRotate<N>::rotate(bvh->root);
|
||||
bvh->clearBarrier(bvh->root);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (bvh->scene->isStaticAccel()) {
|
||||
morton.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
morton.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
Mesh* mesh;
|
||||
mvector<BVHBuilderMorton::BuildPrim> morton;
|
||||
BVHBuilderMorton::Settings settings;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
};
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
565
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp
vendored
Normal file
565
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp
vendored
Normal file
|
|
@ -0,0 +1,565 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
#define PROFILE 0
|
||||
#define PROFILE_RUNS 20
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafQuantized
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct BVHNBuilderSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Geometry* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
Geometry::GTypeMask gtype_;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
|
||||
bool primrefarrayalloc;
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
|
||||
const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
|
||||
settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
|
||||
|
||||
BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* if we use the primrefarray for allocations we have to take it back from the BVH */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.unshare(prims);
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
if (numPrimitives == 0) {
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
|
||||
/* create primref array */
|
||||
if (primrefarrayalloc) {
|
||||
settings.primrefarrayalloc = numPrimitives/1000;
|
||||
if (settings.primrefarrayalloc < 1000)
|
||||
settings.primrefarrayalloc = inf;
|
||||
}
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* initialize allocator */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
PrimInfo pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* pinfo might has zero size due to invalid geometry */
|
||||
if (unlikely(pinfo.size() == 0))
|
||||
{
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
/* call BVH builder */
|
||||
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* if we allocated using the primrefarray we have to keep it alive */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.share(prims);
|
||||
|
||||
/* for static geometries we can do some cleanups */
|
||||
else if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct BVHNBuilderSAHQuantized : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Geometry* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
Geometry::GTypeMask gtype_;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
|
||||
|
||||
BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
|
||||
numPreviousPrimitives = numPrimitives;
|
||||
if (numPrimitives == 0) {
|
||||
prims.clear();
|
||||
bvh->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
/* create primref array */
|
||||
prims.resize(numPrimitives);
|
||||
PrimInfo pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* call BVH builder */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
//bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafGrid
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = set.size(); //Primitive::blocks(n);
|
||||
const size_t start = set.begin();
|
||||
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
bounds[pos] = prims[start+i].bounds();
|
||||
pos++;
|
||||
}
|
||||
assert(pos <= N);
|
||||
new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
|
||||
|
||||
template<int N>
|
||||
struct BVHNBuilderSAHGrid : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
GridMesh* mesh;
|
||||
mvector<PrimRef> prims;
|
||||
mvector<SubGridBuildData> sgrids;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
const unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
|
||||
|
||||
BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* if we use the primrefarray for allocations we have to take it back from the BVH */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.unshare(prims);
|
||||
|
||||
const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
|
||||
numPreviousPrimitives = numGridPrimitives;
|
||||
|
||||
|
||||
PrimInfo pinfo = mesh ? createPrimRefArrayGrids(mesh,prims,sgrids) : createPrimRefArrayGrids(scene,prims,sgrids);
|
||||
const size_t numPrimitives = pinfo.size();
|
||||
/* no primitives */
|
||||
if (numPrimitives == 0) {
|
||||
bvh->clear();
|
||||
prims.clear();
|
||||
sgrids.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
|
||||
|
||||
/* create primref array */
|
||||
settings.primrefarrayalloc = numPrimitives/1000;
|
||||
if (settings.primrefarrayalloc < 1000)
|
||||
settings.primrefarrayalloc = inf;
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
/* initialize allocator */
|
||||
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
|
||||
|
||||
/* pinfo might has zero size due to invalid geometry */
|
||||
if (unlikely(pinfo.size() == 0))
|
||||
{
|
||||
bvh->clear();
|
||||
sgrids.clear();
|
||||
prims.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
/* call BVH builder */
|
||||
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
/* clear temporary array */
|
||||
sgrids.clear();
|
||||
|
||||
/* if we allocated using the primrefarray we have to keep it alive */
|
||||
if (settings.primrefarrayalloc != size_t(inf))
|
||||
bvh->alloc.share(prims);
|
||||
|
||||
/* for static geometries we can do some cleanups */
|
||||
else if (scene && scene->isStaticAccel()) {
|
||||
prims.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
|
||||
|
||||
Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
|
||||
Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
|
||||
Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
|
||||
Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
|
||||
Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_max_leaf_size;
|
||||
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
|
||||
}
|
||||
|
||||
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
|
||||
Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_max_leaf_size;
|
||||
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
|
||||
}
|
||||
|
||||
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
|
||||
}
|
||||
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
|
||||
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
|
||||
Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
|
||||
Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
713
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp
vendored
Normal file
713
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp
vendored
Normal file
|
|
@ -0,0 +1,713 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
#include "../builders/bvh_builder_msmblur.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
|
||||
// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
|
||||
#include "../../common/algorithms/parallel_for_for.h"
|
||||
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
|
||||
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
|
||||
#if 0
|
||||
template<int N, typename Primitive>
|
||||
struct CreateMBlurLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
|
||||
__forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
|
||||
|
||||
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t items = Primitive::blocks(set.size());
|
||||
size_t start = set.begin();
|
||||
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
NodeRef node = bvh->encodeLeaf((char*)accel,items);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
|
||||
|
||||
return NodeRecordMB(node,allBounds);
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
PrimRef* prims;
|
||||
size_t time;
|
||||
};
|
||||
#endif
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct CreateMSMBlurLeaf
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
|
||||
|
||||
__forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t items = Primitive::blocks(current.prims.size());
|
||||
size_t start = current.prims.begin();
|
||||
size_t end = current.prims.end();
|
||||
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
|
||||
NodeRef node = bvh->encodeLeaf((char*)accel,items);
|
||||
LBBox3fa allBounds = empty;
|
||||
for (size_t i=0; i<items; i++)
|
||||
allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
|
||||
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
/* Motion blur BVH with 4D nodes and internal time splits */
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct BVHNBuilderMBlurSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
|
||||
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
const size_t sahBlockSize;
|
||||
const float intCost;
|
||||
const size_t minLeafSize;
|
||||
const size_t maxLeafSize;
|
||||
const Geometry::GTypeMask gtype_;
|
||||
|
||||
BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
|
||||
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
|
||||
if (numPrimitives == 0) { bvh->clear(); return; }
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
|
||||
|
||||
#if PROFILE
|
||||
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
|
||||
#endif
|
||||
|
||||
//const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
|
||||
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
|
||||
|
||||
/*if (numTimeSegments == 1)
|
||||
buildSingleSegment(numPrimitives);
|
||||
else*/
|
||||
buildMultiSegment(numPrimitives);
|
||||
|
||||
#if PROFILE
|
||||
});
|
||||
#endif
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
|
||||
void buildSingleSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRef> prims(scene->device,numPrimitives);
|
||||
const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface,0);
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
|
||||
CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
|
||||
prims.data(),pinfo,settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
void buildMultiSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRefMB> prims(scene->device,numPrimitives);
|
||||
PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
BVHBuilderMSMBlur::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxDepth;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root =
|
||||
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
|
||||
RecalculatePrimRef<Mesh>(scene),
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB4D::Create(),
|
||||
typename BVH::AABBNodeMB4D::Set(),
|
||||
CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
|
||||
bvh->scene->progressInterface,
|
||||
settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
struct GridRecalculatePrimRef
|
||||
{
|
||||
Scene* scene;
|
||||
const SubGridBuildData * const sgrids;
|
||||
|
||||
__forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
|
||||
: scene(scene), sgrids(sgrids) {}
|
||||
|
||||
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
||||
{
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const GridMesh* mesh = scene->get<GridMesh>(geomID);
|
||||
const unsigned int buildID = prim.primID();
|
||||
const SubGridBuildData &subgrid = sgrids[buildID];
|
||||
const unsigned int primID = subgrid.primID;
|
||||
const size_t x = subgrid.x();
|
||||
const size_t y = subgrid.y();
|
||||
const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
|
||||
const unsigned num_time_segments = mesh->numTimeSegments();
|
||||
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
||||
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
||||
const unsigned int geomID = prim.geomID();
|
||||
const GridMesh* mesh = scene->get<GridMesh>(geomID);
|
||||
const unsigned int buildID = prim.primID();
|
||||
const SubGridBuildData &subgrid = sgrids[buildID];
|
||||
const unsigned int primID = subgrid.primID;
|
||||
const size_t x = subgrid.x();
|
||||
const size_t y = subgrid.y();
|
||||
return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<int N>
|
||||
struct CreateMSMBlurLeafGrid
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
|
||||
|
||||
__forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
|
||||
|
||||
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = current.prims.size();
|
||||
const size_t start = current.prims.begin();
|
||||
|
||||
const PrimRefMB* prims = current.prims.prims->data();
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds0[N];
|
||||
BBox3fa bounds1[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
const size_t x = sgrid_bd.x();
|
||||
const size_t y = sgrid_bd.y();
|
||||
LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
|
||||
allBounds.extend(newBounds);
|
||||
bounds0[pos] = newBounds.bounds0;
|
||||
bounds1[pos] = newBounds.bounds1;
|
||||
pos++;
|
||||
}
|
||||
assert(pos <= N);
|
||||
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
|
||||
}
|
||||
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
|
||||
}
|
||||
|
||||
Scene *scene;
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
|
||||
#if 0
|
||||
template<int N>
|
||||
struct CreateLeafGridMB
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::NodeRecordMB NodeRecordMB;
|
||||
|
||||
__forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
|
||||
: scene(scene), bvh(bvh), sgrids(sgrids) {}
|
||||
|
||||
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
const size_t items = set.size();
|
||||
const size_t start = set.begin();
|
||||
|
||||
/* collect all subsets with unique geomIDs */
|
||||
assert(items <= N);
|
||||
unsigned int geomIDs[N];
|
||||
unsigned int num_geomIDs = 1;
|
||||
geomIDs[0] = prims[start].geomID();
|
||||
|
||||
for (size_t i=1;i<items;i++)
|
||||
{
|
||||
bool found = false;
|
||||
const unsigned int new_geomID = prims[start+i].geomID();
|
||||
for (size_t j=0;j<num_geomIDs;j++)
|
||||
if (new_geomID == geomIDs[j])
|
||||
{ found = true; break; }
|
||||
if (!found)
|
||||
geomIDs[num_geomIDs++] = new_geomID;
|
||||
}
|
||||
|
||||
/* allocate all leaf memory in one single block */
|
||||
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
|
||||
|
||||
LBBox3fa allBounds = empty;
|
||||
|
||||
for (size_t g=0;g<num_geomIDs;g++)
|
||||
{
|
||||
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
|
||||
|
||||
unsigned int x[N];
|
||||
unsigned int y[N];
|
||||
unsigned int primID[N];
|
||||
BBox3fa bounds0[N];
|
||||
BBox3fa bounds1[N];
|
||||
unsigned int pos = 0;
|
||||
for (size_t i=0;i<items;i++)
|
||||
{
|
||||
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
|
||||
|
||||
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
|
||||
x[pos] = sgrid_bd.sx;
|
||||
y[pos] = sgrid_bd.sy;
|
||||
primID[pos] = sgrid_bd.primID;
|
||||
const size_t x = sgrid_bd.x();
|
||||
const size_t y = sgrid_bd.y();
|
||||
bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
|
||||
bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
|
||||
assert(valid0);
|
||||
assert(valid1);
|
||||
allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
|
||||
pos++;
|
||||
}
|
||||
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
|
||||
}
|
||||
return NodeRecordMB(node,allBounds);
|
||||
}
|
||||
|
||||
Scene *scene;
|
||||
BVH* bvh;
|
||||
const SubGridBuildData * const sgrids;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/* Motion blur BVH with 4D nodes and internal time splits */
|
||||
template<int N>
|
||||
struct BVHNBuilderMBlurSAHGrid : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
|
||||
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
|
||||
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
const size_t sahBlockSize;
|
||||
const float intCost;
|
||||
const size_t minLeafSize;
|
||||
const size_t maxLeafSize;
|
||||
mvector<SubGridBuildData> sgrids;
|
||||
|
||||
|
||||
BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
|
||||
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
|
||||
|
||||
|
||||
PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfo> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j,range<size_t>(0,1))) continue;
|
||||
BBox3fa bounds = empty;
|
||||
const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
|
||||
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfo.size();
|
||||
if (numPrimitives == 0) return pinfo;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
|
||||
|
||||
k = base.size();
|
||||
size_t p_index = k;
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
if (!mesh->valid(j,range<size_t>(0,1))) continue;
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
|
||||
const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
|
||||
pinfo.add_center2(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
|
||||
|
||||
assert(pinfo.size() == numPrimitives);
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
|
||||
{
|
||||
/* first run to get #primitives */
|
||||
ParallelForForPrefixSumState<PrimInfoMB> pstate;
|
||||
Scene::Iterator<GridMesh,true> iter(scene);
|
||||
|
||||
pstate.init(iter,size_t(1024));
|
||||
/* iterate over all meshes in the scene */
|
||||
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
|
||||
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
LBBox3fa bounds(empty);
|
||||
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
|
||||
pinfoMB.merge(gridMB);
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
size_t numPrimitives = pinfoMB.size();
|
||||
if (numPrimitives == 0) return pinfoMB;
|
||||
|
||||
/* resize arrays */
|
||||
sgrids.resize(numPrimitives);
|
||||
prims.resize(numPrimitives);
|
||||
/* second run to fill primrefs and SubGridBuildData arrays */
|
||||
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
|
||||
|
||||
k = base.size();
|
||||
size_t p_index = k;
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
|
||||
const GridMesh::Grid &g = mesh->grid(j);
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
|
||||
pinfoMB.add_primref(prim);
|
||||
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[p_index++] = prim;
|
||||
}
|
||||
}
|
||||
return pinfoMB;
|
||||
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
|
||||
|
||||
assert(pinfoMB.size() == numPrimitives);
|
||||
pinfoMB.time_range = t0t1;
|
||||
return pinfoMB;
|
||||
}
|
||||
|
||||
void build()
|
||||
{
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
|
||||
if (numPrimitives == 0) { bvh->clear(); return; }
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
|
||||
|
||||
//const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
|
||||
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
|
||||
//if (numTimeSegments == 1)
|
||||
// buildSingleSegment(numPrimitives);
|
||||
//else
|
||||
buildMultiSegment(numPrimitives);
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void buildSingleSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRef> prims(scene->device,numPrimitives);
|
||||
const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
|
||||
//TODO: check leaf_bytes
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
|
||||
(typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB::Create(),
|
||||
typename BVH::AABBNodeMB::Set(),
|
||||
CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
|
||||
bvh->scene->progressInterface,
|
||||
prims.data(),pinfo,settings);
|
||||
|
||||
bvh->set(root.ref,root.lbounds,pinfo.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
void buildMultiSegment(size_t numPrimitives)
|
||||
{
|
||||
/* create primref array */
|
||||
mvector<PrimRefMB> prims(scene->device,numPrimitives);
|
||||
PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
|
||||
|
||||
/* early out if no valid primitives */
|
||||
if (pinfo.size() == 0) { bvh->clear(); return; }
|
||||
|
||||
|
||||
|
||||
GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
|
||||
|
||||
/* estimate acceleration structure size */
|
||||
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
|
||||
//FIXME: check leaf_bytes
|
||||
//const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
|
||||
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
|
||||
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
/* settings for BVH build */
|
||||
BVHBuilderMSMBlur::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxDepth;
|
||||
settings.logBlockSize = bsr(sahBlockSize);
|
||||
settings.minLeafSize = min(minLeafSize,maxLeafSize);
|
||||
settings.maxLeafSize = maxLeafSize;
|
||||
settings.travCost = travCost;
|
||||
settings.intCost = intCost;
|
||||
settings.singleLeafTimeSegment = false;
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
/* build hierarchy */
|
||||
auto root =
|
||||
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
|
||||
recalculatePrimRef,
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNodeMB4D::Create(),
|
||||
typename BVH::AABBNodeMB4D::Set(),
|
||||
CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
|
||||
bvh->scene->progressInterface,
|
||||
settings);
|
||||
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
|
||||
return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
|
||||
}
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
|
||||
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
|
||||
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
|
||||
return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
201
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp
vendored
Normal file
201
engine/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp
vendored
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh.h"
|
||||
#include "bvh_builder.h"
|
||||
|
||||
#include "../builders/primrefgen.h"
|
||||
#include "../builders/primrefgen_presplit.h"
|
||||
#include "../builders/splitter.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglev_mb.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/subgrid.h"
|
||||
|
||||
#include "../common/state.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Primitive>
|
||||
struct CreateLeafSpatial
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
|
||||
|
||||
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
size_t n = set.size();
|
||||
size_t items = Primitive::blocks(n);
|
||||
size_t start = set.begin();
|
||||
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
|
||||
for (size_t i=0; i<items; i++) {
|
||||
accel[i].fill(prims,start,set.end(),bvh->scene);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
BVH* bvh;
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive, typename Splitter>
|
||||
struct BVHNBuilderFastSpatialSAH : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
Mesh* mesh;
|
||||
mvector<PrimRef> prims0;
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
const float splitFactor;
|
||||
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
|
||||
unsigned int numPreviousPrimitives = 0;
|
||||
|
||||
BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
|
||||
splitFactor(scene->device->max_spatial_split_replications) {}
|
||||
|
||||
BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
|
||||
: bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
|
||||
splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
|
||||
|
||||
// FIXME: shrink bvh->alloc in destructor here and in other builders too
|
||||
|
||||
void build()
|
||||
{
|
||||
/* we reset the allocator when the mesh size changed */
|
||||
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
|
||||
bvh->alloc.clear();
|
||||
}
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
|
||||
numPreviousPrimitives = numOriginalPrimitives;
|
||||
if (numOriginalPrimitives == 0) {
|
||||
prims0.clear();
|
||||
bvh->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
|
||||
const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
|
||||
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
|
||||
|
||||
/* create primref array */
|
||||
const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
|
||||
prims0.resize(numSplitPrimitives);
|
||||
|
||||
/* enable os_malloc for two level build */
|
||||
if (mesh)
|
||||
bvh->alloc.setOSallocation(true);
|
||||
|
||||
NodeRef root(0);
|
||||
PrimInfo pinfo;
|
||||
|
||||
|
||||
if (likely(usePreSplits))
|
||||
{
|
||||
/* spatial presplit SAH BVH builder */
|
||||
pinfo = mesh ?
|
||||
createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
|
||||
createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
|
||||
|
||||
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
|
||||
/* call BVH builder */
|
||||
root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* standard spatial split SAH BVH builder */
|
||||
pinfo = mesh ?
|
||||
createPrimRefArray(mesh,geomID_,numSplitPrimitives,prims0,bvh->scene->progressInterface) :
|
||||
createPrimRefArray(scene,Mesh::geom_type,false,numSplitPrimitives,prims0,bvh->scene->progressInterface);
|
||||
|
||||
Splitter splitter(scene);
|
||||
|
||||
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
|
||||
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
|
||||
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
|
||||
/* call BVH builder */
|
||||
root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
CreateLeafSpatial<N,Primitive>(bvh),
|
||||
splitter,
|
||||
bvh->scene->progressInterface,
|
||||
prims0.data(),
|
||||
numSplitPrimitives,
|
||||
pinfo,settings);
|
||||
|
||||
/* ==================== */
|
||||
}
|
||||
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
|
||||
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
|
||||
|
||||
/* clear temporary data for static geometry */
|
||||
if (scene && scene->isStaticAccel()) {
|
||||
prims0.clear();
|
||||
}
|
||||
bvh->cleanup();
|
||||
bvh->postBuild(t0);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
prims0.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
|
||||
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
385
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp
vendored
Normal file
385
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp
vendored
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include "bvh_builder_twolevel.h"
|
||||
#include "bvh_statistics.h"
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../common/scene_line_segments.h"
|
||||
#include "../common/scene_triangle_mesh.h"
|
||||
#include "../common/scene_quad_mesh.h"
|
||||
|
||||
#define PROFILE 0
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
|
||||
: bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// ===========================================================================
|
||||
// ===========================================================================
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
|
||||
{
|
||||
/* delete some objects */
|
||||
size_t num = scene->size();
|
||||
if (num < bvh->objects.size()) {
|
||||
parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
builders[i].reset();
|
||||
delete bvh->objects[i]; bvh->objects[i] = nullptr;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#if PROFILE
|
||||
while(1)
|
||||
#endif
|
||||
{
|
||||
/* reset memory allocator */
|
||||
bvh->alloc.reset();
|
||||
|
||||
/* skip build for empty scene */
|
||||
const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
|
||||
|
||||
if (numPrimitives == 0) {
|
||||
prims.resize(0);
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* calculate the size of the entire BVH */
|
||||
const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
|
||||
const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
|
||||
const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
|
||||
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
|
||||
|
||||
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
|
||||
|
||||
/* resize object array if scene got larger */
|
||||
if (bvh->objects.size() < num) bvh->objects.resize(num);
|
||||
if (builders.size() < num) builders.resize(num);
|
||||
resizeRefsList ();
|
||||
nextRef.store(0);
|
||||
|
||||
/* create acceleration structures */
|
||||
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
|
||||
{
|
||||
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
|
||||
{
|
||||
Mesh* mesh = scene->getSafe<Mesh>(objectID);
|
||||
|
||||
/* ignore meshes we do not support */
|
||||
if (mesh == nullptr || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
|
||||
if (isSmallGeometry(mesh)) {
|
||||
setupSmallBuildRefBuilder (objectID, mesh);
|
||||
} else {
|
||||
setupLargeBuildRefBuilder (objectID, mesh);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* parallel build of acceleration structures */
|
||||
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
|
||||
{
|
||||
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
|
||||
{
|
||||
/* ignore if no triangle mesh or not enabled */
|
||||
Mesh* mesh = scene->getSafe<Mesh>(objectID);
|
||||
if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
|
||||
builders[objectID]->attachBuildRefs (this);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
#if PROFILE
|
||||
double d0 = getSeconds();
|
||||
#endif
|
||||
/* fast path for single geometry scenes */
|
||||
if (nextRef == 1) {
|
||||
bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* open all large nodes */
|
||||
refs.resize(nextRef);
|
||||
|
||||
/* this probably needs some more tuning */
|
||||
const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
|
||||
|
||||
#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
#if ENABLE_OPEN_SEQUENTIAL
|
||||
open_sequential(extSize);
|
||||
#endif
|
||||
/* compute PrimRefs */
|
||||
prims.resize(refs.size());
|
||||
#endif
|
||||
|
||||
{
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
pinfo.add_center2(refs[i]);
|
||||
}
|
||||
return pinfo;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
|
||||
#else
|
||||
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
pinfo.add_center2(refs[i]);
|
||||
prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
|
||||
}
|
||||
return pinfo;
|
||||
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
|
||||
#endif
|
||||
|
||||
/* skip if all objects where empty */
|
||||
if (pinfo.size() == 0)
|
||||
bvh->set(BVH::emptyNode,empty,0);
|
||||
|
||||
/* otherwise build toplevel hierarchy */
|
||||
else
|
||||
{
|
||||
/* settings for BVH build */
|
||||
GeneralBVHBuilder::Settings settings;
|
||||
settings.branchingFactor = N;
|
||||
settings.maxDepth = BVH::maxBuildDepthLeaf;
|
||||
settings.logBlockSize = bsr(N);
|
||||
settings.minLeafSize = 1;
|
||||
settings.maxLeafSize = 1;
|
||||
settings.travCost = 1.0f;
|
||||
settings.intCost = 1.0f;
|
||||
settings.singleThreadThreshold = singleThreadThreshold;
|
||||
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
|
||||
refs.resize(extSize);
|
||||
|
||||
NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
|
||||
[&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
|
||||
assert(range.size() == 1);
|
||||
return (NodeRef) refs[range.begin()].node;
|
||||
},
|
||||
[&] (BuildRef &bref, BuildRef *refs) -> size_t {
|
||||
return openBuildRef(bref,refs);
|
||||
},
|
||||
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
|
||||
refs.data(),extSize,pinfo,settings);
|
||||
#else
|
||||
NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
|
||||
typename BVH::CreateAlloc(bvh),
|
||||
typename BVH::AABBNode::Create2(),
|
||||
typename BVH::AABBNode::Set2(),
|
||||
|
||||
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
|
||||
assert(range.size() == 1);
|
||||
return (NodeRef) prims[range.begin()].ID();
|
||||
},
|
||||
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
|
||||
prims.data(),pinfo,settings);
|
||||
#endif
|
||||
|
||||
|
||||
bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bvh->alloc.cleanup();
|
||||
bvh->postBuild(t0);
|
||||
#if PROFILE
|
||||
double d1 = getSeconds();
|
||||
std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
|
||||
{
|
||||
if (geomID >= bvh->objects.size()) return;
|
||||
if (builders[geomID]) builders[geomID].reset();
|
||||
delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
|
||||
{
|
||||
for (size_t i=0; i<bvh->objects.size(); i++)
|
||||
if (bvh->objects[i]) bvh->objects[i]->clear();
|
||||
|
||||
for (size_t i=0; i<builders.size(); i++)
|
||||
if (builders[i]) builders[i].reset();
|
||||
|
||||
refs.clear();
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
|
||||
{
|
||||
if (refs.size() == 0)
|
||||
return;
|
||||
|
||||
refs.reserve(extSize);
|
||||
|
||||
#if 1
|
||||
for (size_t i=0;i<refs.size();i++)
|
||||
{
|
||||
NodeRef ref = refs[i].node;
|
||||
if (ref.isAABBNode())
|
||||
BVH::prefetch(ref);
|
||||
}
|
||||
#endif
|
||||
|
||||
std::make_heap(refs.begin(),refs.end());
|
||||
while (refs.size()+N-1 <= extSize)
|
||||
{
|
||||
std::pop_heap (refs.begin(),refs.end());
|
||||
NodeRef ref = refs.back().node;
|
||||
if (ref.isLeaf()) break;
|
||||
refs.pop_back();
|
||||
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVH::emptyNode) continue;
|
||||
refs.push_back(BuildRef(node->bounds(i),node->child(i)));
|
||||
|
||||
#if 1
|
||||
NodeRef ref_pre = node->child(i);
|
||||
if (ref_pre.isAABBNode())
|
||||
ref_pre.prefetch();
|
||||
#endif
|
||||
std::push_heap (refs.begin(),refs.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
|
||||
{
|
||||
if (builders[objectID] == nullptr || // new mesh
|
||||
dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
|
||||
{
|
||||
builders[objectID].reset (new RefBuilderSmall(objectID));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
|
||||
{
|
||||
if (bvh->objects[objectID] == nullptr || // new mesh
|
||||
builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
|
||||
dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
|
||||
{
|
||||
Builder* builder = nullptr;
|
||||
delete bvh->objects[objectID];
|
||||
createMeshAccel(objectID, builder);
|
||||
builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__)
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH8BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
|
||||
return new BVHNBuilderTwoLevel<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
262
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h
vendored
Normal file
262
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h
vendored
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "bvh_builder_twolevel_internal.h"
|
||||
#include "bvh.h"
|
||||
#include "../builders/priminfo.h"
|
||||
#include "../builders/primrefgen.h"
|
||||
|
||||
/* new open/merge builder */
|
||||
#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
|
||||
#define ENABLE_OPEN_SEQUENTIAL 0
|
||||
#define SPLIT_MEMORY_RESERVE_FACTOR 1000
|
||||
#define SPLIT_MEMORY_RESERVE_SCALE 2
|
||||
#define SPLIT_MIN_EXT_SPACE 1000
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNBuilderTwoLevel : public Builder
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
__forceinline static bool isSmallGeometry(Mesh* mesh) {
|
||||
return mesh->size() <= 4;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
|
||||
|
||||
struct BuildRef : public PrimRef
|
||||
{
|
||||
public:
|
||||
__forceinline BuildRef () {}
|
||||
|
||||
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
|
||||
: PrimRef(bounds,(size_t)node), node(node)
|
||||
{
|
||||
if (node.isLeaf())
|
||||
bounds_area = 0.0f;
|
||||
else
|
||||
bounds_area = area(this->bounds());
|
||||
}
|
||||
|
||||
/* used by the open/merge bvh builder */
|
||||
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
|
||||
: PrimRef(bounds,geomID,numPrimitives), node(node)
|
||||
{
|
||||
/* important for relative buildref ordering */
|
||||
if (node.isLeaf())
|
||||
bounds_area = 0.0f;
|
||||
else
|
||||
bounds_area = area(this->bounds());
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return primID();
|
||||
}
|
||||
|
||||
friend bool operator< (const BuildRef& a, const BuildRef& b) {
|
||||
return a.bounds_area < b.bounds_area;
|
||||
}
|
||||
|
||||
friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
|
||||
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
|
||||
}
|
||||
|
||||
__forceinline unsigned int numPrimitives() const { return primID(); }
|
||||
|
||||
public:
|
||||
NodeRef node;
|
||||
float bounds_area;
|
||||
};
|
||||
|
||||
|
||||
__forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
|
||||
if (bref.node.isLeaf())
|
||||
{
|
||||
refs[0] = bref;
|
||||
return 1;
|
||||
}
|
||||
NodeRef ref = bref.node;
|
||||
unsigned int geomID = bref.geomID();
|
||||
unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
size_t n = 0;
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (node->child(i) == BVH::emptyNode) continue;
|
||||
refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
|
||||
n++;
|
||||
}
|
||||
assert(n > 1);
|
||||
return n;
|
||||
}
|
||||
|
||||
/*! Constructor. */
|
||||
BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
|
||||
|
||||
/*! Destructor */
|
||||
~BVHNBuilderTwoLevel ();
|
||||
|
||||
/*! builder entry point */
|
||||
void build();
|
||||
void deleteGeometry(size_t geomID);
|
||||
void clear();
|
||||
|
||||
void open_sequential(const size_t extSize);
|
||||
|
||||
private:
|
||||
|
||||
class RefBuilderBase {
|
||||
public:
|
||||
virtual ~RefBuilderBase () {}
|
||||
virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
|
||||
virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
|
||||
};
|
||||
|
||||
class RefBuilderSmall : public RefBuilderBase {
|
||||
public:
|
||||
|
||||
RefBuilderSmall (size_t objectID)
|
||||
: objectID_ (objectID) {}
|
||||
|
||||
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
|
||||
|
||||
Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
|
||||
size_t meshSize = mesh->size();
|
||||
assert(isSmallGeometry(mesh));
|
||||
|
||||
mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
|
||||
auto pinfo = createPrimRefArray(mesh,objectID_,meshSize,prefs,topBuilder->bvh->scene->progressInterface);
|
||||
|
||||
size_t begin=0;
|
||||
while (begin < pinfo.size())
|
||||
{
|
||||
Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
|
||||
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
|
||||
accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
|
||||
|
||||
/* create build primitive */
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
|
||||
#else
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
|
||||
#endif
|
||||
}
|
||||
assert(begin == pinfo.size());
|
||||
}
|
||||
|
||||
bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t objectID_;
|
||||
};
|
||||
|
||||
class RefBuilderLarge : public RefBuilderBase {
|
||||
public:
|
||||
|
||||
RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
|
||||
: objectID_ (objectID), builder_ (builder), quality_ (quality) {}
|
||||
|
||||
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
|
||||
{
|
||||
BVH* object = topBuilder->getBVH(objectID_); assert(object);
|
||||
|
||||
/* build object if it got modified */
|
||||
if (topBuilder->isGeometryModified(objectID_))
|
||||
builder_->build();
|
||||
|
||||
/* create build primitive */
|
||||
if (!object->getBounds().empty())
|
||||
{
|
||||
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
|
||||
Mesh* mesh = topBuilder->getMesh(objectID_);
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
|
||||
#else
|
||||
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool meshQualityChanged (RTCBuildQuality currQuality) {
|
||||
return currQuality != quality_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t objectID_;
|
||||
Ref<Builder> builder_;
|
||||
RTCBuildQuality quality_;
|
||||
};
|
||||
|
||||
void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
|
||||
void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
|
||||
|
||||
BVH* getBVH (size_t objectID) {
|
||||
return this->bvh->objects[objectID];
|
||||
}
|
||||
Mesh* getMesh (size_t objectID) {
|
||||
return this->scene->template getSafe<Mesh>(objectID);
|
||||
}
|
||||
bool isGeometryModified (size_t objectID) {
|
||||
return this->scene->isGeometryModified(objectID);
|
||||
}
|
||||
|
||||
void resizeRefsList ()
|
||||
{
|
||||
size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
|
||||
[this](const range<size_t>& r)->size_t {
|
||||
size_t c = 0;
|
||||
for (auto i=r.begin(); i<r.end(); ++i) {
|
||||
Mesh* mesh = scene->getSafe<Mesh>(i);
|
||||
if (mesh == nullptr || mesh->numTimeSteps != 1)
|
||||
continue;
|
||||
size_t meshSize = mesh->size();
|
||||
c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
|
||||
}
|
||||
return c;
|
||||
},
|
||||
std::plus<size_t>()
|
||||
);
|
||||
|
||||
if (refs.size() < num) {
|
||||
refs.resize(num);
|
||||
}
|
||||
}
|
||||
|
||||
void createMeshAccel (size_t geomID, Builder*& builder)
|
||||
{
|
||||
bvh->objects[geomID] = new BVH(Primitive::type,scene);
|
||||
BVH* accel = bvh->objects[geomID];
|
||||
auto mesh = scene->getSafe<Mesh>(geomID);
|
||||
if (nullptr == mesh) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
|
||||
return;
|
||||
}
|
||||
|
||||
__internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
|
||||
}
|
||||
|
||||
using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
|
||||
|
||||
BuilderList builders;
|
||||
BVH* bvh;
|
||||
Scene* scene;
|
||||
mvector<BuildRef> refs;
|
||||
mvector<PrimRef> prims;
|
||||
std::atomic<int> nextRef;
|
||||
const size_t singleThreadThreshold;
|
||||
Geometry::GTypeMask gtype;
|
||||
bool useMortonBuilder_ = false;
|
||||
};
|
||||
}
|
||||
}
|
||||
304
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h
vendored
Normal file
304
engine/thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h
vendored
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/quadi.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
|
||||
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
|
||||
|
||||
namespace isa
|
||||
{
|
||||
|
||||
namespace __internal_two_level_builder__ {
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct MortonBuilder {};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,TriangleMesh,Triangle4i> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,QuadMesh,Quad4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,UserGeometry,Object> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,Instance,InstancePrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,TriangleMesh,Triangle4i> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,QuadMesh,Quad4v> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,UserGeometry,Object> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,Instance,InstancePrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct MortonBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
MortonBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct SAHBuilder {};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,TriangleMesh,Triangle4i> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,QuadMesh,Quad4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,UserGeometry,Object> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,Instance,InstancePrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,TriangleMesh,Triangle4i> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,QuadMesh,Quad4v> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,UserGeometry,Object> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,Instance,InstancePrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct SAHBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
SAHBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct RefitBuilder {};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,TriangleMesh,Triangle4i> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,QuadMesh,Quad4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,UserGeometry,Object> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,Instance,InstancePrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<4,InstanceArray,InstanceArrayPrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,TriangleMesh,Triangle4i> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,QuadMesh,Quad4v> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,UserGeometry,Object> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,Instance,InstancePrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
template<>
|
||||
struct RefitBuilder<8,InstanceArray,InstanceArrayPrimitive> {
|
||||
RefitBuilder () {}
|
||||
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
struct MeshBuilder {
|
||||
MeshBuilder () {}
|
||||
void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
|
||||
if(useMortonBuilder) {
|
||||
builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
|
||||
return;
|
||||
}
|
||||
switch (mesh->quality) {
|
||||
case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
case RTC_BUILD_QUALITY_MEDIUM:
|
||||
case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
|
||||
default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
377
engine/thirdparty/embree/kernels/bvh/bvh_collider.cpp
vendored
Normal file
377
engine/thirdparty/embree/kernels/bvh/bvh_collider.cpp
vendored
Normal file
|
|
@ -0,0 +1,377 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_collider.h"
|
||||
|
||||
#include "../geometry/triangle_triangle_intersector.h"
|
||||
#include "../../common/algorithms/parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
#define CSTAT(x)
|
||||
|
||||
size_t parallel_depth_threshold = 3;
|
||||
CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
|
||||
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
|
||||
|
||||
struct Collision
|
||||
{
|
||||
__forceinline Collision() {}
|
||||
|
||||
__forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
|
||||
: geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
|
||||
|
||||
unsigned geomID0;
|
||||
unsigned primID0;
|
||||
unsigned geomID1;
|
||||
unsigned primID1;
|
||||
};
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
|
||||
{
|
||||
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
|
||||
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
|
||||
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
|
||||
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
|
||||
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
|
||||
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
|
||||
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
|
||||
}
|
||||
|
||||
bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
|
||||
{
|
||||
CSTAT(bvh_collide_prim_intersections1++);
|
||||
const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
|
||||
const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
|
||||
const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
|
||||
const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
|
||||
|
||||
/* special culling for scene intersection with itself */
|
||||
if (scene0 == scene1 && geomID0 == geomID1)
|
||||
{
|
||||
/* ignore self intersections */
|
||||
if (primID0 == primID1)
|
||||
return false;
|
||||
}
|
||||
CSTAT(bvh_collide_prim_intersections2++);
|
||||
|
||||
if (scene0 == scene1 && geomID0 == geomID1)
|
||||
{
|
||||
/* ignore intersection with topological neighbors */
|
||||
const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
|
||||
if (any(vint4(tri1.v[0]) == t0)) return false;
|
||||
if (any(vint4(tri1.v[1]) == t0)) return false;
|
||||
if (any(vint4(tri1.v[2]) == t0)) return false;
|
||||
}
|
||||
CSTAT(bvh_collide_prim_intersections3++);
|
||||
|
||||
const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
|
||||
const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
|
||||
const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
|
||||
const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
|
||||
const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
|
||||
const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
|
||||
|
||||
return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
|
||||
{
|
||||
Collision collisions[16];
|
||||
size_t num_collisions = 0;
|
||||
|
||||
size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
|
||||
size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
|
||||
for (size_t i=0; i<N0; i++) {
|
||||
for (size_t j=0; j<N1; j++) {
|
||||
const unsigned geomID0 = leaf0[i].geomID();
|
||||
const unsigned primID0 = leaf0[i].primID();
|
||||
const unsigned geomID1 = leaf1[j].geomID();
|
||||
const unsigned primID1 = leaf1[j].primID();
|
||||
if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
|
||||
collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
|
||||
if (num_collisions == 16) {
|
||||
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
|
||||
num_collisions = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_collisions)
|
||||
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
|
||||
{
|
||||
CSTAT(bvh_collide_traversal_steps++);
|
||||
if (unlikely(ref0.isLeaf())) {
|
||||
if (unlikely(ref1.isLeaf())) {
|
||||
CSTAT(bvh_collide_leaf_pairs++);
|
||||
processLeaf(ref0,ref1);
|
||||
return;
|
||||
} else goto recurse_node1;
|
||||
|
||||
} else {
|
||||
if (unlikely(ref1.isLeaf())) {
|
||||
goto recurse_node0;
|
||||
} else {
|
||||
if (area(bounds0) > area(bounds1)) {
|
||||
goto recurse_node0;
|
||||
}
|
||||
else {
|
||||
goto recurse_node1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node0:
|
||||
AABBNode* node0 = ref0.getAABBNode();
|
||||
size_t mask = overlap<N>(bounds1,*node0);
|
||||
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
//for (size_t i=0; i<N; i++) {
|
||||
#if 0
|
||||
if (depth0 < parallel_depth_threshold)
|
||||
{
|
||||
parallel_for(size_t(N), [&] ( size_t i ) {
|
||||
if (mask & ( 1 << i)) {
|
||||
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node1:
|
||||
AABBNode* node1 = ref1.getAABBNode();
|
||||
size_t mask = overlap<N>(bounds0,*node1);
|
||||
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
//for (size_t i=0; i<N; i++) {
|
||||
#if 0
|
||||
if (depth1 < parallel_depth_threshold)
|
||||
{
|
||||
parallel_for(size_t(N), [&] ( size_t i ) {
|
||||
if (mask & ( 1 << i)) {
|
||||
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
|
||||
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
|
||||
{
|
||||
if (unlikely(job.ref0.isLeaf())) {
|
||||
if (unlikely(job.ref1.isLeaf())) {
|
||||
jobs.push_back(job);
|
||||
return;
|
||||
} else goto recurse_node1;
|
||||
} else {
|
||||
if (unlikely(job.ref1.isLeaf())) {
|
||||
goto recurse_node0;
|
||||
} else {
|
||||
if (area(job.bounds0) > area(job.bounds1)) {
|
||||
goto recurse_node0;
|
||||
}
|
||||
else {
|
||||
goto recurse_node1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node0:
|
||||
const AABBNode* node0 = job.ref0.getAABBNode();
|
||||
size_t mask = overlap<N>(job.bounds1,*node0);
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
recurse_node1:
|
||||
const AABBNode* node1 = job.ref1.getAABBNode();
|
||||
size_t mask = overlap<N>(job.bounds0,*node1);
|
||||
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
|
||||
jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
|
||||
{
|
||||
CSTAT(bvh_collide_traversal_steps = 0);
|
||||
CSTAT(bvh_collide_leaf_pairs = 0);
|
||||
CSTAT(bvh_collide_leaf_iterations = 0);
|
||||
CSTAT(bvh_collide_prim_intersections1 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections2 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections3 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections4 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections5 = 0);
|
||||
CSTAT(bvh_collide_prim_intersections = 0);
|
||||
#if 0
|
||||
collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
|
||||
#else
|
||||
const int M = 2048;
|
||||
jobvector jobs[2];
|
||||
jobs[0].reserve(M);
|
||||
jobs[1].reserve(M);
|
||||
jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
|
||||
int source = 0;
|
||||
int target = 1;
|
||||
|
||||
/* try to split job until job list is full */
|
||||
while (jobs[source].size()+8 <= M)
|
||||
{
|
||||
for (size_t i=0; i<jobs[source].size(); i++)
|
||||
{
|
||||
const CollideJob& job = jobs[source][i];
|
||||
size_t remaining = jobs[source].size()-i;
|
||||
if (jobs[target].size()+remaining+8 > M) {
|
||||
jobs[target].push_back(job);
|
||||
} else {
|
||||
split(job,jobs[target]);
|
||||
}
|
||||
}
|
||||
|
||||
/* stop splitting jobs if we reached only leaves and cannot make progress anymore */
|
||||
if (jobs[target].size() == jobs[source].size())
|
||||
break;
|
||||
|
||||
jobs[source].resize(0);
|
||||
std::swap(source,target);
|
||||
}
|
||||
|
||||
/* parallel processing of all jobs */
|
||||
parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
|
||||
CollideJob& j = jobs[source][i];
|
||||
collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
|
||||
});
|
||||
|
||||
|
||||
#endif
|
||||
CSTAT(PRINT(bvh_collide_traversal_steps));
|
||||
CSTAT(PRINT(bvh_collide_leaf_pairs));
|
||||
CSTAT(PRINT(bvh_collide_leaf_iterations));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections1));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections2));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections3));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections4));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections5));
|
||||
CSTAT(PRINT(bvh_collide_prim_intersections));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
|
||||
{
|
||||
BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
|
||||
collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
|
||||
}
|
||||
|
||||
#if defined (EMBREE_LOWEST_ISA)
|
||||
struct collision_regression_test : public RegressionTest
|
||||
{
|
||||
collision_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
|
||||
Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
|
||||
Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
collision_regression_test collision_regression("collision_regression_test");
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Collider Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
|
||||
|
||||
#if defined(__AVX__)
|
||||
DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
72
engine/thirdparty/embree/kernels/bvh/bvh_collider.h
vendored
Normal file
72
engine/thirdparty/embree/kernels/bvh/bvh_collider.h
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/object.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNCollider
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
struct CollideJob
|
||||
{
|
||||
CollideJob () {}
|
||||
|
||||
CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
|
||||
NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
|
||||
: ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
|
||||
|
||||
NodeRef ref0;
|
||||
BBox3fa bounds0;
|
||||
size_t depth0;
|
||||
NodeRef ref1;
|
||||
BBox3fa bounds1;
|
||||
size_t depth1;
|
||||
};
|
||||
|
||||
typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
|
||||
|
||||
void split(const CollideJob& job, jobvector& jobs);
|
||||
|
||||
public:
|
||||
__forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
|
||||
: scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
|
||||
|
||||
public:
|
||||
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
|
||||
void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
|
||||
void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
|
||||
|
||||
protected:
|
||||
Scene* scene0;
|
||||
Scene* scene1;
|
||||
RTCCollideFunc callback;
|
||||
void* userPtr;
|
||||
};
|
||||
|
||||
template<int N>
|
||||
class BVHNColliderUserGeom : public BVHNCollider<N>
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
__forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
|
||||
: BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
|
||||
|
||||
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
|
||||
public:
|
||||
static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
|
||||
};
|
||||
}
|
||||
}
|
||||
21
engine/thirdparty/embree/kernels/bvh/bvh_factory.h
vendored
Normal file
21
engine/thirdparty/embree/kernels/bvh/bvh_factory.h
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
#include "../common/isa.h"
|
||||
#include "../common/accel.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVH instantiations */
|
||||
class BVHFactory
|
||||
{
|
||||
public:
|
||||
enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
|
||||
enum class IntersectVariant { FAST, ROBUST };
|
||||
};
|
||||
}
|
||||
322
engine/thirdparty/embree/kernels/bvh/bvh_intersector1.cpp
vendored
Normal file
322
engine/thirdparty/embree/kernels/bvh/bvh_intersector1.cpp
vendored
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector1.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "bvh_traverser1.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/curveNv_intersector.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/curveNi_mb_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/instance_array_intersector.h"
|
||||
#include "../geometry/subgrid_intersector.h"
|
||||
#include "../geometry/subgrid_mb_intersector.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
|
||||
RayHit& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* perform per ray precalculations required by the primitive intersector */
|
||||
Precalculations pre(ray, bvh);
|
||||
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack+stackSize;
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* filter out invalid rays */
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (!ray.valid()) return;
|
||||
#endif
|
||||
/* verify correct input */
|
||||
assert(ray.valid());
|
||||
assert(ray.tnear() >= 0.0f);
|
||||
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(normal.trav_nodes,1,1,1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
|
||||
tray.tfar = ray.tfar;
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
|
||||
Ray& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
/* early out for already occluded rays */
|
||||
if (unlikely(ray.tfar < 0.0f))
|
||||
return;
|
||||
|
||||
/* perform per ray precalculations required by the primitive intersector */
|
||||
Precalculations pre(ray, bvh);
|
||||
|
||||
/* stack state */
|
||||
NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
|
||||
NodeRef* stackPtr = stack+1; // current stack pointer
|
||||
NodeRef* stackEnd = stack+stackSize;
|
||||
stack[0] = bvh->root;
|
||||
|
||||
/* filter out invalid rays */
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
if (!ray.valid()) return;
|
||||
#endif
|
||||
|
||||
/* verify correct input */
|
||||
assert(ray.valid());
|
||||
assert(ray.tnear() >= 0.0f);
|
||||
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = (NodeRef)*stackPtr;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(shadow.trav_nodes,1,1,1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
|
||||
ray.tfar = neg_inf;
|
||||
break;
|
||||
}
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
*stackPtr = (NodeRef)lazy_node;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
struct PointQueryDispatch
|
||||
{
|
||||
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersector1::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
|
||||
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return false;
|
||||
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack+stackSize;
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
/* verify correct input */
|
||||
assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
|
||||
|
||||
/* load the point query into SIMD registers */
|
||||
TravPointQuery<N> tquery(query->p, context->query_radius);
|
||||
|
||||
/* initialize the node traverser */
|
||||
BVHNNodeTraverser1Hit<N,types> nodeTraverser;
|
||||
|
||||
bool changed = false;
|
||||
float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
|
||||
? query->radius * query->radius
|
||||
: dot(context->query_radius, context->query_radius);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > cull_radius))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(point_query.trav_nodes,1,1,1);
|
||||
bool nodeIntersected;
|
||||
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
|
||||
nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
|
||||
} else {
|
||||
nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
|
||||
}
|
||||
if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(point_query.trav_leaves,1,1,1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
|
||||
{
|
||||
changed = true;
|
||||
tquery.rad = context->query_radius;
|
||||
cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
|
||||
? query->radius * query->radius
|
||||
: dot(context->query_radius, context->query_radius);
|
||||
}
|
||||
|
||||
/* push lazy node onto stack */
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
};
|
||||
|
||||
/* disable point queries for not yet supported geometry types */
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust>
|
||||
struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
|
||||
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
|
||||
};
|
||||
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
|
||||
const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
34
engine/thirdparty/embree/kernels/bvh/bvh_intersector1.h
vendored
Normal file
34
engine/thirdparty/embree/kernels/bvh/bvh_intersector1.h
vendored
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../common/ray.h"
|
||||
#include "../common/point_query.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! BVH single ray intersector. */
|
||||
template<int N, int types, bool robust, typename PrimitiveIntersector1>
|
||||
class BVHNIntersector1
|
||||
{
|
||||
/* shortcuts for frequently used types */
|
||||
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersector1::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
|
||||
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
|
||||
public:
|
||||
static void intersect (const Accel::Intersectors* This, RayHit& ray, RayQueryContext* context);
|
||||
static void occluded (const Accel::Intersectors* This, Ray& ray, RayQueryContext* context);
|
||||
static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
|
||||
};
|
||||
}
|
||||
}
|
||||
64
engine/thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp
vendored
Normal file
64
engine/thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector1.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
int getISA() {
|
||||
return VerifyMultiTargetLinking::getISA();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4Intersector1 Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
|
||||
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1> >));
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1MB> >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
|
||||
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
|
||||
|
||||
}
|
||||
}
|
||||
918
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
918
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
vendored
Normal file
|
|
@ -0,0 +1,918 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.h"
|
||||
#include "bvh_traverser1.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "node_intersector_packet.h"
|
||||
|
||||
#include "../geometry/intersector_iterators.h"
|
||||
#include "../geometry/triangle_intersector.h"
|
||||
#include "../geometry/trianglev_intersector.h"
|
||||
#include "../geometry/trianglev_mb_intersector.h"
|
||||
#include "../geometry/trianglei_intersector.h"
|
||||
#include "../geometry/quadv_intersector.h"
|
||||
#include "../geometry/quadi_intersector.h"
|
||||
#include "../geometry/curveNv_intersector.h"
|
||||
#include "../geometry/curveNi_intersector.h"
|
||||
#include "../geometry/curveNi_mb_intersector.h"
|
||||
#include "../geometry/linei_intersector.h"
|
||||
#include "../geometry/subdivpatch1_intersector.h"
|
||||
#include "../geometry/object_intersector.h"
|
||||
#include "../geometry/instance_intersector.h"
|
||||
#include "../geometry/instance_array_intersector.h"
|
||||
#include "../geometry/subgrid_intersector.h"
|
||||
#include "../geometry/subgrid_mb_intersector.h"
|
||||
#include "../geometry/curve_intersector_virtual.h"
|
||||
|
||||
#define SWITCH_DURING_DOWN_TRAVERSAL 1
|
||||
#define FORCE_SINGLE_MODE 0
|
||||
|
||||
#define ENABLE_FAST_COHERENT_CODEPATHS 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayHitK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
|
||||
stack[0].ptr = root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* if popped node is too far, pop next one */
|
||||
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
|
||||
continue;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(normal.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
|
||||
|
||||
tray1.tfar = ray.tfar[k];
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
RayQueryContext* __restrict__ context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
intersectCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
if (single)
|
||||
{
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
for (; valid_bits!=0; ) {
|
||||
const size_t i = bscf(valid_bits);
|
||||
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
/* test whether we have ray with opposing direction signs in the packet */
|
||||
bool split = false;
|
||||
{
|
||||
size_t bits = valid_bits;
|
||||
vbool<K> vsplit( false );
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
bits &= ~(size_t)movemask(octant_valid);
|
||||
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
|
||||
} while (bits);
|
||||
if (any(vsplit)) split = true;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
|
||||
const vint<K> count_diff_octant = \
|
||||
((diff_octant >> 2) & 1) +
|
||||
((diff_octant >> 1) & 1) +
|
||||
((diff_octant >> 0) & 1);
|
||||
|
||||
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
|
||||
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
|
||||
|
||||
|
||||
octant = select(octant_valid,vint<K>(0xffffffff),octant);
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
intersect1(This, bvh, cur, i, pre, ray, tray, context);
|
||||
}
|
||||
tray.tfar = min(tray.tfar, ray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
size_t num_child_hits = 0;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we choose to continue with that child if it
|
||||
is closer than the current next child, or we push it onto the stack */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
/* push cur node onto stack and continue with hit child */
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
*sptr_node = child; sptr_node++;
|
||||
*sptr_near = childDist; sptr_near++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
|
||||
#endif
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (any(sptr_near[-2] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-2],sptr_near[-1]);
|
||||
std::swap(sptr_node[-2],sptr_node[-1]);
|
||||
}
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (any(sptr_near[-3] < sptr_near[-1]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-1]);
|
||||
std::swap(sptr_node[-3],sptr_node[-1]);
|
||||
}
|
||||
if (any(sptr_near[-3] < sptr_near[-2]))
|
||||
{
|
||||
std::swap(sptr_near[-3],sptr_near[-2]);
|
||||
std::swap(sptr_node[-3],sptr_node[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayHitK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
const vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].dist = neg_inf;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *(float*)&stackPtr->dist;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active))) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const vfloat<K> childDist = fmin[i];
|
||||
const NodeRef child = node->child(i);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (any(childDist < curDist))
|
||||
{
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
*(float*)&stackPtr->dist = toScalar(curDist);
|
||||
stackPtr++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
/* push hit child onto stack */
|
||||
else {
|
||||
num_child_hits++;
|
||||
stackPtr->ptr = child;
|
||||
*(float*)&stackPtr->dist = toScalar(childDist);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
|
||||
/* improved distance sorting for 3 or more hits */
|
||||
if (unlikely(num_child_hits >= 2))
|
||||
{
|
||||
if (stackPtr[-2].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-2],stackPtr[-1]);
|
||||
if (unlikely(num_child_hits >= 3))
|
||||
{
|
||||
if (stackPtr[-3].dist < stackPtr[-1].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-1]);
|
||||
if (stackPtr[-3].dist < stackPtr[-2].dist)
|
||||
std::swap(stackPtr[-3],stackPtr[-2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
|
||||
/* reduce max distance interval on successful intersection */
|
||||
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
|
||||
{
|
||||
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
|
||||
frustum.template updateMaxDist<K>(tray.tfar);
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->dist = neg_inf;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
|
||||
} while(valid_bits);
|
||||
}
|
||||
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
// ===================================================================================================================================================================
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
|
||||
const BVH* bvh,
|
||||
NodeRef root,
|
||||
size_t k,
|
||||
Precalculations& pre,
|
||||
RayK<K>& ray,
|
||||
const TravRayK<K, robust>& tray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
/* stack state */
|
||||
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
|
||||
NodeRef* stackPtr = stack+1; // current stack pointer
|
||||
NodeRef* stackEnd = stack+stackSizeSingle;
|
||||
stack[0] = root;
|
||||
|
||||
/* load the ray into SIMD registers */
|
||||
TravRay<N,robust> tray1;
|
||||
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
|
||||
|
||||
/* pop loop */
|
||||
while (true) pop:
|
||||
{
|
||||
/* pop next node */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
stackPtr--;
|
||||
NodeRef cur = (NodeRef)*stackPtr;
|
||||
|
||||
/* downtraversal loop */
|
||||
while (true)
|
||||
{
|
||||
/* intersect node */
|
||||
size_t mask; vfloat<N> tNear;
|
||||
STAT3(shadow.trav_nodes, 1, 1, 1);
|
||||
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
|
||||
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
|
||||
|
||||
/* if no child is hit, pop next node */
|
||||
if (unlikely(mask == 0))
|
||||
goto pop;
|
||||
|
||||
/* select next child and push other children */
|
||||
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
|
||||
}
|
||||
|
||||
/* this is a leaf node */
|
||||
assert(cur != BVH::emptyNode);
|
||||
STAT3(shadow.trav_leaves, 1, 1, 1);
|
||||
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
|
||||
ray.tfar[k] = neg_inf;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*stackPtr = lazy_node;
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* we may traverse an empty BVH in case all geometry was invalid */
|
||||
if (bvh->root == BVH::emptyNode)
|
||||
return;
|
||||
|
||||
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
|
||||
assert(context);
|
||||
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
|
||||
{
|
||||
occludedCoherent(valid_i, This, ray, context);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* filter out already occluded and invalid rays */
|
||||
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
const size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid, ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
const vfloat<K> inf = vfloat<K>(pos_inf);
|
||||
|
||||
/* determine switch threshold based on flags */
|
||||
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
|
||||
|
||||
/* allocate stack and push root node */
|
||||
vfloat<K> stack_near[stackSizeChunk];
|
||||
NodeRef stack_node[stackSizeChunk];
|
||||
stack_node[0] = BVH::invalidNode;
|
||||
stack_near[0] = inf;
|
||||
stack_node[1] = bvh->root;
|
||||
stack_near[1] = tray.tnear;
|
||||
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
|
||||
NodeRef* __restrict__ sptr_node = stack_node + 2;
|
||||
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
assert(sptr_node > stack_node);
|
||||
sptr_node--;
|
||||
sptr_near--;
|
||||
NodeRef cur = *sptr_node;
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
/* cull node if behind closest hit point */
|
||||
vfloat<K> curDist = *sptr_near;
|
||||
const vbool<K> active = curDist < tray.tfar;
|
||||
if (unlikely(none(active)))
|
||||
continue;
|
||||
|
||||
/* switch to single ray traversal */
|
||||
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (single)
|
||||
#endif
|
||||
{
|
||||
size_t bits = movemask(active);
|
||||
#if FORCE_SINGLE_MODE == 0
|
||||
if (unlikely(popcnt(bits) <= switchThreshold))
|
||||
#endif
|
||||
{
|
||||
for (; bits!=0; ) {
|
||||
const size_t i = bscf(bits);
|
||||
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
|
||||
set(terminated, i);
|
||||
}
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
const vbool<K> valid_node = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const BaseNode* __restrict__ const node = nodeRef.baseNode();
|
||||
|
||||
/* set cur to invalid */
|
||||
cur = BVH::emptyNode;
|
||||
curDist = pos_inf;
|
||||
|
||||
for (unsigned i = 0; i < N; i++)
|
||||
{
|
||||
const NodeRef child = node->children[i];
|
||||
if (unlikely(child == BVH::emptyNode)) break;
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = valid_node;
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
assert(sptr_node < stackEnd);
|
||||
assert(child != BVH::emptyNode);
|
||||
const vfloat<K> childDist = select(lhit, lnearP, inf);
|
||||
|
||||
/* push 'cur' node onto stack and continue with hit child */
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
*sptr_node = cur; sptr_node++;
|
||||
*sptr_near = curDist; sptr_near++;
|
||||
}
|
||||
curDist = childDist;
|
||||
cur = child;
|
||||
}
|
||||
}
|
||||
if (unlikely(cur == BVH::emptyNode))
|
||||
goto pop;
|
||||
|
||||
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
|
||||
if (single)
|
||||
{
|
||||
// seems to be the best place for testing utilization
|
||||
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
|
||||
{
|
||||
*sptr_node++ = cur;
|
||||
*sptr_near++ = curDist;
|
||||
goto pop;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return if stack is empty */
|
||||
if (unlikely(cur == BVH::invalidNode)) {
|
||||
assert(sptr_node == stack_node);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::emptyNode);
|
||||
const vbool<K> valid_leaf = tray.tfar > curDist;
|
||||
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
|
||||
if (unlikely(none(valid_leaf))) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
if (all(terminated)) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
*sptr_node = lazy_node; sptr_node++;
|
||||
*sptr_near = neg_inf; sptr_near++;
|
||||
}
|
||||
}
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
|
||||
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
|
||||
Accel::Intersectors* __restrict__ This,
|
||||
RayK<K>& __restrict__ ray,
|
||||
RayQueryContext* context)
|
||||
{
|
||||
BVH* __restrict__ bvh = (BVH*)This->ptr;
|
||||
|
||||
/* filter out invalid rays */
|
||||
vbool<K> valid = *valid_i == -1;
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
valid &= ray.valid();
|
||||
#endif
|
||||
|
||||
/* return if there are no valid rays */
|
||||
size_t valid_bits = movemask(valid);
|
||||
if (unlikely(valid_bits == 0)) return;
|
||||
|
||||
/* verify correct input */
|
||||
assert(all(valid, ray.valid()));
|
||||
assert(all(valid, ray.tnear() >= 0.0f));
|
||||
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
|
||||
Precalculations pre(valid,ray);
|
||||
|
||||
/* load ray */
|
||||
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
|
||||
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
|
||||
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
|
||||
|
||||
vbool<K> terminated = !valid;
|
||||
|
||||
vint<K> octant = ray.octant();
|
||||
octant = select(valid, octant, vint<K>(0xffffffff));
|
||||
|
||||
do
|
||||
{
|
||||
const size_t valid_index = bsf(valid_bits);
|
||||
vbool<K> octant_valid = octant[valid_index] == octant;
|
||||
valid_bits &= ~(size_t)movemask(octant_valid);
|
||||
|
||||
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
|
||||
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
|
||||
|
||||
Frustum<robust> frustum;
|
||||
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
|
||||
|
||||
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
|
||||
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
|
||||
stack[0].ptr = bvh->root;
|
||||
stack[0].mask = movemask(octant_valid);
|
||||
|
||||
while (1) pop:
|
||||
{
|
||||
/* pop next node from stack */
|
||||
if (unlikely(stackPtr == stack)) break;
|
||||
|
||||
stackPtr--;
|
||||
NodeRef cur = NodeRef(stackPtr->ptr);
|
||||
|
||||
/* cull node of active rays have already been terminated */
|
||||
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
|
||||
|
||||
if (unlikely(m_active == 0)) continue;
|
||||
|
||||
while (likely(!cur.isLeaf()))
|
||||
{
|
||||
/* process nodes */
|
||||
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
|
||||
const NodeRef nodeRef = cur;
|
||||
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
|
||||
|
||||
vfloat<N> fmin;
|
||||
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
|
||||
|
||||
if (unlikely(!m_frustum_node)) goto pop;
|
||||
cur = BVH::emptyNode;
|
||||
m_active = 0;
|
||||
|
||||
#if defined(__AVX__)
|
||||
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
|
||||
#endif
|
||||
//size_t num_child_hits = 0;
|
||||
do {
|
||||
const size_t i = bscf(m_frustum_node);
|
||||
vfloat<K> lnearP;
|
||||
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
|
||||
STAT3(normal.trav_nodes, 1, 1, 1);
|
||||
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
|
||||
|
||||
if (likely(any(lhit)))
|
||||
{
|
||||
const NodeRef child = node->child(i);
|
||||
assert(child != BVH::emptyNode);
|
||||
BVHN<N>::prefetch(child);
|
||||
if (likely(cur != BVH::emptyNode)) {
|
||||
//num_child_hits++;
|
||||
stackPtr->ptr = cur;
|
||||
stackPtr->mask = m_active;
|
||||
stackPtr++;
|
||||
}
|
||||
cur = child;
|
||||
m_active = movemask(lhit);
|
||||
}
|
||||
} while(m_frustum_node);
|
||||
|
||||
if (unlikely(cur == BVH::emptyNode)) goto pop;
|
||||
}
|
||||
|
||||
/* intersect leaf */
|
||||
assert(cur != BVH::invalidNode);
|
||||
assert(cur != BVH::emptyNode);
|
||||
#if defined(__AVX__)
|
||||
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
|
||||
#endif
|
||||
if (unlikely(!m_active)) continue;
|
||||
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
|
||||
|
||||
size_t lazy_node = 0;
|
||||
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
|
||||
octant_valid &= !terminated;
|
||||
if (unlikely(none(octant_valid))) break;
|
||||
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
|
||||
|
||||
if (unlikely(lazy_node)) {
|
||||
stackPtr->ptr = lazy_node;
|
||||
stackPtr->mask = movemask(octant_valid);
|
||||
stackPtr++;
|
||||
}
|
||||
}
|
||||
} while(valid_bits);
|
||||
|
||||
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
|
||||
}
|
||||
}
|
||||
}
|
||||
58
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h
vendored
Normal file
58
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "../common/ray.h"
|
||||
#include "../common/stack_item.h"
|
||||
#include "node_intersector_frustum.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int K, bool robust>
|
||||
struct TravRayK;
|
||||
|
||||
/*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
|
||||
class BVHNIntersectorKHybrid
|
||||
{
|
||||
/* shortcuts for frequently used types */
|
||||
typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
|
||||
typedef typename PrimitiveIntersectorK::Primitive Primitive;
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
typedef typename BVH::BaseNode BaseNode;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
|
||||
static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
|
||||
static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
|
||||
|
||||
static const size_t switchThresholdIncoherent = \
|
||||
(K==4) ? 3 :
|
||||
(K==8) ? ((N==4) ? 5 : 7) :
|
||||
(K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
|
||||
0;
|
||||
|
||||
private:
|
||||
static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
|
||||
RayHitK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
|
||||
static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
|
||||
RayK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
|
||||
|
||||
public:
|
||||
static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
|
||||
static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
|
||||
|
||||
static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
|
||||
static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
|
||||
|
||||
};
|
||||
|
||||
/*! BVH packet intersector. */
|
||||
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
|
||||
class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
|
||||
}
|
||||
}
|
||||
62
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
62
engine/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp
vendored
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_intersector_hybrid.cpp"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// BVH4Intersector4 Definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
|
||||
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
|
||||
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
|
||||
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
|
||||
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
|
||||
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
|
||||
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
|
||||
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
|
||||
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorK<4>> >));
|
||||
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorKMB<4>> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
|
||||
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
229
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb.h
vendored
Normal file
229
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb.h
vendored
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVHN AABBNode */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNode_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
|
||||
{
|
||||
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
|
||||
node.getAABBNode()->setRef(i,child);
|
||||
node.getAABBNode()->setBounds(i,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
struct Create2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
|
||||
for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNode_t* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
return ref;
|
||||
}
|
||||
};
|
||||
|
||||
struct Set3
|
||||
{
|
||||
Set3 (FastAllocator* allocator, PrimRef* prims)
|
||||
: allocator(allocator), prims(prims) {}
|
||||
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNode_t* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
|
||||
if (unlikely(precord.alloc_barrier))
|
||||
{
|
||||
PrimRef* begin = &prims[precord.prims.begin()];
|
||||
PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
|
||||
size_t bytes = (size_t)end - (size_t)begin;
|
||||
allocator->addBlock(begin,bytes);
|
||||
}
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
FastAllocator* const allocator;
|
||||
PrimRef* const prims;
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_x = lower_y = lower_z = pos_inf;
|
||||
upper_x = upper_y = upper_z = neg_inf;
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const BBox3fa& bounds)
|
||||
{
|
||||
assert(i < N);
|
||||
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
|
||||
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
|
||||
setBounds(i,bounds);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns bounds of node. */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
|
||||
const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
|
||||
const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extend(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
/*! Returns bounds of all children (implemented later as specializations) */
|
||||
__forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
|
||||
|
||||
/*! swap two children of the node */
|
||||
__forceinline void swap(size_t i, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(children[i],children[j]);
|
||||
std::swap(lower_x[i],lower_x[j]);
|
||||
std::swap(lower_y[i],lower_y[j]);
|
||||
std::swap(lower_z[i],lower_z[j]);
|
||||
std::swap(upper_x[i],upper_x[j]);
|
||||
std::swap(upper_y[i],upper_y[j]);
|
||||
std::swap(upper_z[i],upper_z[j]);
|
||||
}
|
||||
|
||||
/*! swap the children of two nodes */
|
||||
__forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(a->children[i],b->children[j]);
|
||||
std::swap(a->lower_x[i],b->lower_x[j]);
|
||||
std::swap(a->lower_y[i],b->lower_y[j]);
|
||||
std::swap(a->lower_z[i],b->lower_z[j]);
|
||||
std::swap(a->upper_x[i],b->upper_x[j]);
|
||||
std::swap(a->upper_y[i],b->upper_y[j]);
|
||||
std::swap(a->upper_z[i],b->upper_z[j]);
|
||||
}
|
||||
|
||||
/*! compacts a node (moves empty children to the end) */
|
||||
__forceinline static void compact(AABBNode_t* a)
|
||||
{
|
||||
/* find right most filled node */
|
||||
ssize_t j=N;
|
||||
for (j=j-1; j>=0; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
|
||||
/* replace empty nodes with filled nodes */
|
||||
for (ssize_t i=0; i<j; i++) {
|
||||
if (a->child(i) == NodeRef::emptyNode) {
|
||||
a->swap(i,j);
|
||||
for (j=j-1; j>i; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! output operator */
|
||||
friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
|
||||
{
|
||||
o << "AABBNode { " << embree_endl;
|
||||
o << " lower_x " << n.lower_x << embree_endl;
|
||||
o << " upper_x " << n.upper_x << embree_endl;
|
||||
o << " lower_y " << n.lower_y << embree_endl;
|
||||
o << " upper_y " << n.upper_y << embree_endl;
|
||||
o << " lower_z " << n.lower_z << embree_endl;
|
||||
o << " upper_z " << n.upper_z << embree_endl;
|
||||
o << " children = ";
|
||||
for (size_t i=0; i<N; i++) o << n.children[i] << " ";
|
||||
o << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
|
||||
};
|
||||
|
||||
template<>
|
||||
__forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
|
||||
transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
|
||||
transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
|
||||
}
|
||||
}
|
||||
255
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h
vendored
Normal file
255
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h
vendored
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Motion Blur AABBNode */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
struct Create
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i].ref == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
AABBNodeMB_t* node = ref.getAABBNodeMB();
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
node->setRef(i,children[i].ref);
|
||||
node->setBounds(i,children[i].lbounds);
|
||||
bounds.extend(children[i].lbounds);
|
||||
}
|
||||
return NodeRecordMB(ref,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
struct SetTimeRange
|
||||
{
|
||||
__forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
|
||||
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
|
||||
{
|
||||
AABBNodeMB_t* node = ref.getAABBNodeMB();
|
||||
|
||||
LBBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
node->setRef(i, children[i].ref);
|
||||
node->setBounds(i, children[i].lbounds, tbounds);
|
||||
bounds.extend(children[i].lbounds);
|
||||
}
|
||||
return NodeRecordMB(ref,bounds);
|
||||
}
|
||||
|
||||
BBox1f tbounds;
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
|
||||
upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
|
||||
lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
|
||||
upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, NodeRef ref) {
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
|
||||
{
|
||||
/*! for empty bounds we have to avoid inf-inf=nan */
|
||||
BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
|
||||
BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
|
||||
bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
|
||||
bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
|
||||
Vec3fa dlower = bounds1.lower-bounds0.lower;
|
||||
Vec3fa dupper = bounds1.upper-bounds0.upper;
|
||||
|
||||
lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
|
||||
upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
|
||||
|
||||
lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
|
||||
upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
|
||||
setBounds(i, bounds.bounds0, bounds.bounds1);
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
|
||||
setBounds(i, bounds.global(tbounds));
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
|
||||
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
|
||||
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRecordMB4D& child)
|
||||
{
|
||||
setRef(i, child.ref);
|
||||
setBounds(i, child.lbounds, child.dt);
|
||||
}
|
||||
|
||||
/*! Return bounding box for time 0 */
|
||||
__forceinline BBox3fa bounds0(size_t i) const {
|
||||
return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
|
||||
Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
|
||||
}
|
||||
|
||||
/*! Return bounding box for time 1 */
|
||||
__forceinline BBox3fa bounds1(size_t i) const {
|
||||
return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
|
||||
Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
|
||||
}
|
||||
|
||||
/*! Returns bounds of node. */
|
||||
__forceinline BBox3fa bounds() const {
|
||||
return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
|
||||
reduce_min(min(lower_y,lower_y+lower_dy)),
|
||||
reduce_min(min(lower_z,lower_z+lower_dz))),
|
||||
Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
|
||||
reduce_max(max(upper_y,upper_y+upper_dy)),
|
||||
reduce_max(max(upper_z,upper_z+upper_dz))));
|
||||
}
|
||||
|
||||
/*! Return bounding box of child i */
|
||||
__forceinline BBox3fa bounds(size_t i) const {
|
||||
return merge(bounds0(i),bounds1(i));
|
||||
}
|
||||
|
||||
/*! Return linear bounding box of child i */
|
||||
__forceinline LBBox3fa lbounds(size_t i) const {
|
||||
return LBBox3fa(bounds0(i),bounds1(i));
|
||||
}
|
||||
|
||||
/*! Return bounding box of child i at specified time */
|
||||
__forceinline BBox3fa bounds(size_t i, float time) const {
|
||||
return lerp(bounds0(i),bounds1(i),time);
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i) const {
|
||||
return lbounds(i).expectedHalfArea();
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
|
||||
return lbounds(i).expectedHalfArea(t0t1);
|
||||
}
|
||||
|
||||
/*! swap two children of the node */
|
||||
__forceinline void swap(size_t i, size_t j)
|
||||
{
|
||||
assert(i<N && j<N);
|
||||
std::swap(children[i],children[j]);
|
||||
|
||||
std::swap(lower_x[i],lower_x[j]);
|
||||
std::swap(upper_x[i],upper_x[j]);
|
||||
std::swap(lower_y[i],lower_y[j]);
|
||||
std::swap(upper_y[i],upper_y[j]);
|
||||
std::swap(lower_z[i],lower_z[j]);
|
||||
std::swap(upper_z[i],upper_z[j]);
|
||||
|
||||
std::swap(lower_dx[i],lower_dx[j]);
|
||||
std::swap(upper_dx[i],upper_dx[j]);
|
||||
std::swap(lower_dy[i],lower_dy[j]);
|
||||
std::swap(upper_dy[i],upper_dy[j]);
|
||||
std::swap(lower_dz[i],lower_dz[j]);
|
||||
std::swap(upper_dz[i],upper_dz[j]);
|
||||
}
|
||||
|
||||
/*! compacts a node (moves empty children to the end) */
|
||||
__forceinline static void compact(AABBNodeMB_t* a)
|
||||
{
|
||||
/* find right most filled node */
|
||||
ssize_t j=N;
|
||||
for (j=j-1; j>=0; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
|
||||
/* replace empty nodes with filled nodes */
|
||||
for (ssize_t i=0; i<j; i++) {
|
||||
if (a->child(i) == NodeRef::emptyNode) {
|
||||
a->swap(i,j);
|
||||
for (j=j-1; j>i; j--)
|
||||
if (a->child(j) != NodeRef::emptyNode)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! stream output operator */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
|
||||
{
|
||||
cout << "AABBNodeMB {" << embree_endl;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const BBox3fa b0 = n.bounds0(i);
|
||||
const BBox3fa b1 = n.bounds1(i);
|
||||
cout << " child" << i << " { " << embree_endl;
|
||||
cout << " bounds0 = " << b0 << ", " << embree_endl;
|
||||
cout << " bounds1 = " << b1 << ", " << embree_endl;
|
||||
cout << " }";
|
||||
}
|
||||
cout << "}";
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
|
||||
|
||||
vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
|
||||
vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
|
||||
vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
|
||||
};
|
||||
}
|
||||
115
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h
vendored
Normal file
115
engine/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h
vendored
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_aabb_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Aligned 4D Motion Blur Node */
|
||||
template<typename NodeRef, int N>
|
||||
struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
using AABBNodeMB_t<NodeRef,N>::set;
|
||||
|
||||
typedef BVHNodeRecord<NodeRef> NodeRecord;
|
||||
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
|
||||
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
||||
|
||||
struct Create
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
|
||||
{
|
||||
if (hasTimeSplits)
|
||||
{
|
||||
AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
else
|
||||
{
|
||||
AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i].ref == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
if (likely(ref.isAABBNodeMB())) {
|
||||
for (size_t i=0; i<num; i++)
|
||||
ref.getAABBNodeMB()->set(i, children[i]);
|
||||
} else {
|
||||
for (size_t i=0; i<num; i++)
|
||||
ref.getAABBNodeMB4D()->set(i, children[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
lower_t = vfloat<N>(pos_inf);
|
||||
upper_t = vfloat<N>(neg_inf);
|
||||
AABBNodeMB_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box of child. */
|
||||
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
|
||||
{
|
||||
AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
|
||||
lower_t[i] = tbounds.lower;
|
||||
upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
|
||||
}
|
||||
|
||||
/*! Sets bounding box and ID of child. */
|
||||
__forceinline void set(size_t i, const NodeRecordMB4D& child) {
|
||||
AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
|
||||
setBounds(i, child.lbounds, child.dt);
|
||||
}
|
||||
|
||||
/*! Returns the expected surface area when randomly sampling the time. */
|
||||
__forceinline float expectedHalfArea(size_t i) const {
|
||||
return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
|
||||
}
|
||||
|
||||
/*! returns time range for specified child */
|
||||
__forceinline BBox1f timeRange(size_t i) const {
|
||||
return BBox1f(lower_t[i],upper_t[i]);
|
||||
}
|
||||
|
||||
/*! stream output operator */
|
||||
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
|
||||
{
|
||||
cout << "AABBNodeMB4D {" << embree_endl;
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
const BBox3fa b0 = n.bounds0(i);
|
||||
const BBox3fa b1 = n.bounds1(i);
|
||||
cout << " child" << i << " { " << embree_endl;
|
||||
cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
|
||||
cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
|
||||
cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
|
||||
cout << " }";
|
||||
}
|
||||
cout << "}";
|
||||
return cout;
|
||||
}
|
||||
|
||||
public:
|
||||
vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
|
||||
vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
|
||||
};
|
||||
}
|
||||
43
engine/thirdparty/embree/kernels/bvh/bvh_node_base.h
vendored
Normal file
43
engine/thirdparty/embree/kernels/bvh/bvh_node_base.h
vendored
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_ref.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
||||
/*! BVHN Base Node */
|
||||
template<typename NodeRef, int N>
|
||||
struct BaseNode_t
|
||||
{
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
for (size_t i=0; i<N; i++)
|
||||
children[i] = NodeRef::emptyNode;
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! verifies the node */
|
||||
__forceinline bool verify() const
|
||||
{
|
||||
for (size_t i=0; i<N; i++) {
|
||||
if (child(i) == NodeRef::emptyNode) {
|
||||
for (; i<N; i++) {
|
||||
if (child(i) != NodeRef::emptyNode)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
|
||||
};
|
||||
}
|
||||
98
engine/thirdparty/embree/kernels/bvh/bvh_node_obb.h
vendored
Normal file
98
engine/thirdparty/embree/kernels/bvh/bvh_node_obb.h
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Node with unaligned bounds */
|
||||
template<typename NodeRef, int N>
|
||||
struct OBBNode_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
|
||||
node.ungetAABBNode()->setRef(i,child);
|
||||
node.ungetAABBNode()->setBounds(i,bounds);
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
naabb.l.vx = Vec3fa(nan);
|
||||
naabb.l.vy = Vec3fa(nan);
|
||||
naabb.l.vz = Vec3fa(nan);
|
||||
naabb.p = Vec3fa(nan);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets bounding box. */
|
||||
__forceinline void setBounds(size_t i, const OBBox3fa& b)
|
||||
{
|
||||
assert(i < N);
|
||||
|
||||
AffineSpace3fa space = b.space;
|
||||
space.p -= b.bounds.lower;
|
||||
space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
|
||||
|
||||
naabb.l.vx.x[i] = space.l.vx.x;
|
||||
naabb.l.vx.y[i] = space.l.vx.y;
|
||||
naabb.l.vx.z[i] = space.l.vx.z;
|
||||
|
||||
naabb.l.vy.x[i] = space.l.vy.x;
|
||||
naabb.l.vy.y[i] = space.l.vy.y;
|
||||
naabb.l.vy.z[i] = space.l.vy.z;
|
||||
|
||||
naabb.l.vz.x[i] = space.l.vz.x;
|
||||
naabb.l.vz.y[i] = space.l.vz.y;
|
||||
naabb.l.vz.z[i] = space.l.vz.z;
|
||||
|
||||
naabb.p.x[i] = space.p.x;
|
||||
naabb.p.y[i] = space.p.y;
|
||||
naabb.p.z[i] = space.p.z;
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns the extent of the bounds of the ith child */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
assert(i<N);
|
||||
const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
|
||||
const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
|
||||
const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
|
||||
return rsqrt(vx*vx + vy*vy + vz*vz);
|
||||
}
|
||||
|
||||
/*! Returns reference to specified child */
|
||||
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
|
||||
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
|
||||
|
||||
/*! output operator */
|
||||
friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
|
||||
{
|
||||
o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
public:
|
||||
AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
|
||||
};
|
||||
}
|
||||
90
engine/thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h
vendored
Normal file
90
engine/thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h
vendored
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename NodeRef, int N>
|
||||
struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
|
||||
struct Create
|
||||
{
|
||||
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
|
||||
return NodeRef::encodeNode(node);
|
||||
}
|
||||
};
|
||||
|
||||
struct Set
|
||||
{
|
||||
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
|
||||
node.ungetAABBNodeMB()->setRef(i,child);
|
||||
node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
|
||||
}
|
||||
};
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear()
|
||||
{
|
||||
space0 = one;
|
||||
//b0.lower = b0.upper = Vec3fa(nan);
|
||||
b1.lower = b1.upper = Vec3fa(nan);
|
||||
BaseNode_t<NodeRef,N>::clear();
|
||||
}
|
||||
|
||||
/*! Sets space and bounding boxes. */
|
||||
__forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
|
||||
setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
|
||||
}
|
||||
|
||||
/*! Sets space and bounding boxes. */
|
||||
__forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
|
||||
{
|
||||
assert(i < N);
|
||||
|
||||
AffineSpace3fa space = s0;
|
||||
space.p -= a.lower;
|
||||
Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
|
||||
space = AffineSpace3fa::scale(scale)*space;
|
||||
BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
|
||||
BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
|
||||
|
||||
space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
|
||||
space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
|
||||
space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
|
||||
space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
|
||||
|
||||
/*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
|
||||
b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
|
||||
|
||||
b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
|
||||
b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
|
||||
}
|
||||
|
||||
/*! Sets ID of child. */
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
/*! Returns the extent of the bounds of the ith child */
|
||||
__forceinline Vec3fa extent0(size_t i) const {
|
||||
assert(i < N);
|
||||
const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
|
||||
const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
|
||||
const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
|
||||
return rsqrt(vx*vx + vy*vy + vz*vz);
|
||||
}
|
||||
|
||||
public:
|
||||
AffineSpace3vf<N> space0;
|
||||
//BBox3vf<N> b0; // these are the unit bounds
|
||||
BBox3vf<N> b1;
|
||||
};
|
||||
}
|
||||
273
engine/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h
vendored
Normal file
273
engine/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h
vendored
Normal file
|
|
@ -0,0 +1,273 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh_node_base.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! BVHN Quantized Node */
|
||||
template<int N>
|
||||
struct __aligned(8) QuantizedBaseNode_t
|
||||
{
|
||||
typedef unsigned char T;
|
||||
static const T MIN_QUAN = 0;
|
||||
static const T MAX_QUAN = 255;
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
|
||||
for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
|
||||
madd(scale.y,(float)lower_y[i],start.y),
|
||||
madd(scale.z,(float)lower_z[i],start.z));
|
||||
const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
|
||||
madd(scale.y,(float)upper_y[i],start.y),
|
||||
madd(scale.z,(float)upper_z[i],start.z));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
static __forceinline void init_dim(const vfloat<N> &lower,
|
||||
const vfloat<N> &upper,
|
||||
T lower_quant[N],
|
||||
T upper_quant[N],
|
||||
float &start,
|
||||
float &scale)
|
||||
{
|
||||
/* quantize bounds */
|
||||
const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
|
||||
const float minF = reduce_min(lower);
|
||||
const float maxF = reduce_max(upper);
|
||||
float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
|
||||
float decode_scale = diff / float(MAX_QUAN);
|
||||
if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
|
||||
assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
|
||||
const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
|
||||
vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
|
||||
vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
|
||||
|
||||
/* lower/upper correction */
|
||||
vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
|
||||
vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
|
||||
ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
|
||||
iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
|
||||
|
||||
/* disable invalid lanes */
|
||||
ilower = select(m_valid,ilower,MAX_QUAN);
|
||||
iupper = select(m_valid,iupper,MIN_QUAN);
|
||||
|
||||
/* store as uchar to memory */
|
||||
vint<N>::store(lower_quant,ilower);
|
||||
vint<N>::store(upper_quant,iupper);
|
||||
start = minF;
|
||||
scale = decode_scale;
|
||||
|
||||
#if defined(DEBUG)
|
||||
vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
|
||||
vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
|
||||
vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
|
||||
vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
|
||||
assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
|
||||
assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
|
||||
{
|
||||
init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
|
||||
init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
|
||||
init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
|
||||
}
|
||||
|
||||
__forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
|
||||
|
||||
#if defined(__AVX512F__) // KNL
|
||||
__forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
|
||||
#endif
|
||||
__forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
|
||||
|
||||
__forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
|
||||
|
||||
template <int M>
|
||||
__forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
__forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
|
||||
__forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
|
||||
__forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
|
||||
#endif
|
||||
|
||||
union {
|
||||
struct {
|
||||
T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
|
||||
T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
|
||||
T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
|
||||
T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
|
||||
T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
|
||||
T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
|
||||
};
|
||||
T all_planes[6*N];
|
||||
};
|
||||
|
||||
Vec3f start;
|
||||
Vec3f scale;
|
||||
|
||||
friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
|
||||
{
|
||||
o << "QuantizedBaseNode { " << embree_endl;
|
||||
o << " start " << n.start << embree_endl;
|
||||
o << " scale " << n.scale << embree_endl;
|
||||
o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
|
||||
o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
|
||||
o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
|
||||
o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
|
||||
o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
|
||||
o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
|
||||
o << "}" << embree_endl;
|
||||
return o;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename NodeRef, int N>
|
||||
struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
|
||||
{
|
||||
using BaseNode_t<NodeRef,N>::children;
|
||||
using QuantizedBaseNode_t<N>::lower_x;
|
||||
using QuantizedBaseNode_t<N>::upper_x;
|
||||
using QuantizedBaseNode_t<N>::lower_y;
|
||||
using QuantizedBaseNode_t<N>::upper_y;
|
||||
using QuantizedBaseNode_t<N>::lower_z;
|
||||
using QuantizedBaseNode_t<N>::upper_z;
|
||||
using QuantizedBaseNode_t<N>::start;
|
||||
using QuantizedBaseNode_t<N>::scale;
|
||||
using QuantizedBaseNode_t<N>::init_dim;
|
||||
|
||||
__forceinline void setRef(size_t i, const NodeRef& ref) {
|
||||
assert(i < N);
|
||||
children[i] = ref;
|
||||
}
|
||||
|
||||
struct Create2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
|
||||
{
|
||||
__aligned(64) AABBNode_t<NodeRef,N> node;
|
||||
node.clear();
|
||||
for (size_t i=0; i<n; i++) {
|
||||
node.setBounds(i,children[i].bounds());
|
||||
}
|
||||
QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
|
||||
qnode->init(node);
|
||||
|
||||
return (size_t)qnode | NodeRef::tyQuantizedNode;
|
||||
}
|
||||
};
|
||||
|
||||
struct Set2
|
||||
{
|
||||
template<typename BuildRecord>
|
||||
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
// check that empty children are only at the end of the child list
|
||||
bool emptyChild = false;
|
||||
for (size_t i=0; i<num; i++) {
|
||||
emptyChild |= (children[i] == NodeRef::emptyNode);
|
||||
assert(emptyChild == (children[i] == NodeRef::emptyNode));
|
||||
}
|
||||
#endif
|
||||
QuantizedNode_t* node = ref.quantizedNode();
|
||||
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
|
||||
return ref;
|
||||
}
|
||||
};
|
||||
|
||||
__forceinline void init(AABBNode_t<NodeRef,N>& node)
|
||||
{
|
||||
for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
|
||||
init_dim(node);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/*! BVHN Quantized Node */
|
||||
template<int N>
|
||||
struct __aligned(8) QuantizedBaseNodeMB_t
|
||||
{
|
||||
QuantizedBaseNode_t<N> node0;
|
||||
QuantizedBaseNode_t<N> node1;
|
||||
|
||||
/*! Clears the node. */
|
||||
__forceinline void clear() {
|
||||
node0.clear();
|
||||
node1.clear();
|
||||
}
|
||||
|
||||
/*! Returns bounds of specified child. */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
assert(i < N);
|
||||
BBox3fa bounds0 = node0.bounds(i);
|
||||
BBox3fa bounds1 = node1.bounds(i);
|
||||
bounds0.extend(bounds1);
|
||||
return bounds0;
|
||||
}
|
||||
|
||||
/*! Returns extent of bounds of specified child. */
|
||||
__forceinline Vec3fa extent(size_t i) const {
|
||||
return bounds(i).size();
|
||||
}
|
||||
|
||||
__forceinline vbool<N> validMask() const { return node0.validMask(); }
|
||||
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
|
||||
template<typename T>
|
||||
__forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
|
||||
|
||||
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
|
||||
template<int M>
|
||||
__forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
|
||||
|
||||
};
|
||||
}
|
||||
242
engine/thirdparty/embree/kernels/bvh/bvh_node_ref.h
vendored
Normal file
242
engine/thirdparty/embree/kernels/bvh/bvh_node_ref.h
vendored
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../common/default.h"
|
||||
#include "../common/alloc.h"
|
||||
#include "../common/accel.h"
|
||||
#include "../common/device.h"
|
||||
#include "../common/scene.h"
|
||||
#include "../geometry/primitive.h"
|
||||
#include "../common/ray.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* BVH node reference with bounds */
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecord
|
||||
{
|
||||
__forceinline BVHNodeRecord() {}
|
||||
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
|
||||
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
|
||||
|
||||
NodeRef ref;
|
||||
BBox3fx bounds;
|
||||
};
|
||||
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecordMB
|
||||
{
|
||||
__forceinline BVHNodeRecordMB() {}
|
||||
__forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
|
||||
|
||||
NodeRef ref;
|
||||
LBBox3fa lbounds;
|
||||
};
|
||||
|
||||
template<typename NodeRef>
|
||||
struct BVHNodeRecordMB4D
|
||||
{
|
||||
__forceinline BVHNodeRecordMB4D() {}
|
||||
__forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
|
||||
|
||||
NodeRef ref;
|
||||
LBBox3fa lbounds;
|
||||
BBox1f dt;
|
||||
};
|
||||
|
||||
template<typename NodeRef, int N> struct BaseNode_t;
|
||||
template<typename NodeRef, int N> struct AABBNode_t;
|
||||
template<typename NodeRef, int N> struct AABBNodeMB_t;
|
||||
template<typename NodeRef, int N> struct AABBNodeMB4D_t;
|
||||
template<typename NodeRef, int N> struct OBBNode_t;
|
||||
template<typename NodeRef, int N> struct OBBNodeMB_t;
|
||||
template<typename NodeRef, int N> struct QuantizedNode_t;
|
||||
template<typename NodeRef, int N> struct QuantizedNodeMB_t;
|
||||
|
||||
/*! Pointer that points to a node or a list of primitives */
|
||||
template<int N>
|
||||
struct NodeRefPtr
|
||||
{
|
||||
//template<int NN> friend class BVHN;
|
||||
|
||||
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
|
||||
static const size_t byteAlignment = 16;
|
||||
static const size_t byteNodeAlignment = 4*N;
|
||||
|
||||
/*! highest address bit is used as barrier for some algorithms */
|
||||
static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
|
||||
|
||||
/*! Masks the bits that store the number of items per leaf. */
|
||||
static const size_t align_mask = byteAlignment-1;
|
||||
static const size_t items_mask = byteAlignment-1;
|
||||
|
||||
/*! different supported node types */
|
||||
static const size_t tyAABBNode = 0;
|
||||
static const size_t tyAABBNodeMB = 1;
|
||||
static const size_t tyAABBNodeMB4D = 6;
|
||||
static const size_t tyOBBNode = 2;
|
||||
static const size_t tyOBBNodeMB = 3;
|
||||
static const size_t tyQuantizedNode = 5;
|
||||
static const size_t tyLeaf = 8;
|
||||
|
||||
/*! Empty node */
|
||||
static const size_t emptyNode = tyLeaf;
|
||||
|
||||
/*! Invalid node, used as marker in traversal */
|
||||
static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
|
||||
static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
|
||||
|
||||
/*! Maximum number of primitive blocks in a leaf. */
|
||||
static const size_t maxLeafBlocks = items_mask-tyLeaf;
|
||||
|
||||
/*! Default constructor */
|
||||
__forceinline NodeRefPtr () {}
|
||||
|
||||
/*! Construction from integer */
|
||||
__forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
|
||||
|
||||
/*! Cast to size_t */
|
||||
__forceinline operator size_t() const { return ptr; }
|
||||
|
||||
/*! Sets the barrier bit. */
|
||||
__forceinline void setBarrier() {
|
||||
#if defined(__64BIT__)
|
||||
assert(!isBarrier());
|
||||
ptr |= barrier_mask;
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Clears the barrier bit. */
|
||||
__forceinline void clearBarrier() {
|
||||
#if defined(__64BIT__)
|
||||
ptr &= ~barrier_mask;
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
|
||||
__forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
|
||||
|
||||
/*! checks if this is a leaf */
|
||||
__forceinline size_t isLeaf() const { return ptr & tyLeaf; }
|
||||
|
||||
/*! returns node type */
|
||||
__forceinline int type() const { return ptr & (size_t)align_mask; }
|
||||
|
||||
/*! checks if this is a node */
|
||||
__forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
|
||||
|
||||
/*! checks if this is a motion blur node */
|
||||
__forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
|
||||
|
||||
/*! checks if this is a 4D motion blur node */
|
||||
__forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
|
||||
|
||||
/*! checks if this is a node with unaligned bounding boxes */
|
||||
__forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
|
||||
|
||||
/*! checks if this is a motion blur node with unaligned bounding boxes */
|
||||
__forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
|
||||
|
||||
/*! checks if this is a quantized node */
|
||||
__forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
|
||||
|
||||
/*! Encodes a node */
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node);
|
||||
}
|
||||
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node | tyAABBNodeMB);
|
||||
}
|
||||
|
||||
static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
|
||||
assert(!((size_t)node & align_mask));
|
||||
return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
|
||||
}
|
||||
|
||||
/*! Encodes an unaligned node */
|
||||
static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
|
||||
return NodeRefPtr((size_t) node | tyOBBNode);
|
||||
}
|
||||
|
||||
/*! Encodes an unaligned motion blur node */
|
||||
static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
|
||||
return NodeRefPtr((size_t) node | tyOBBNodeMB);
|
||||
}
|
||||
|
||||
/*! Encodes a leaf */
|
||||
static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
|
||||
assert(!((size_t)tri & align_mask));
|
||||
assert(num <= maxLeafBlocks);
|
||||
return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
|
||||
}
|
||||
|
||||
/*! Encodes a leaf */
|
||||
static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
|
||||
assert(!((size_t)ptr & align_mask));
|
||||
return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
|
||||
}
|
||||
|
||||
/*! returns base node pointer */
|
||||
__forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
|
||||
{
|
||||
assert(!isLeaf());
|
||||
return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
__forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
|
||||
{
|
||||
assert(!isLeaf());
|
||||
return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
|
||||
/*! returns node pointer */
|
||||
__forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
|
||||
__forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
|
||||
|
||||
/*! returns motion blur node pointer */
|
||||
__forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns 4D motion blur node pointer */
|
||||
__forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns unaligned node pointer */
|
||||
__forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns unaligned motion blur node pointer */
|
||||
__forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
__forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
|
||||
|
||||
/*! returns quantized node pointer */
|
||||
__forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
|
||||
__forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
|
||||
|
||||
/*! returns leaf pointer */
|
||||
__forceinline char* leaf(size_t& num) const {
|
||||
assert(isLeaf());
|
||||
num = (ptr & (size_t)items_mask)-tyLeaf;
|
||||
return (char*)(ptr & ~(size_t)align_mask);
|
||||
}
|
||||
|
||||
/*! clear all bit flags */
|
||||
__forceinline void clearFlags() {
|
||||
ptr &= ~(size_t)align_mask;
|
||||
}
|
||||
|
||||
/*! returns the wideness */
|
||||
__forceinline size_t getN() const { return N; }
|
||||
|
||||
public:
|
||||
size_t ptr;
|
||||
};
|
||||
}
|
||||
258
engine/thirdparty/embree/kernels/bvh/bvh_refit.cpp
vendored
Normal file
258
engine/thirdparty/embree/kernels/bvh/bvh_refit.cpp
vendored
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_refit.h"
|
||||
#include "bvh_statistics.h"
|
||||
|
||||
#include "../geometry/linei.h"
|
||||
#include "../geometry/triangle.h"
|
||||
#include "../geometry/trianglev.h"
|
||||
#include "../geometry/trianglei.h"
|
||||
#include "../geometry/quadv.h"
|
||||
#include "../geometry/object.h"
|
||||
#include "../geometry/instance.h"
|
||||
#include "../geometry/instance_array.h"
|
||||
|
||||
#include "../../common/algorithms/parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
|
||||
|
||||
template<int N>
|
||||
__forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
|
||||
{
|
||||
size_t sa = *(size_t*)&a->node()->lower_x;
|
||||
size_t sb = *(size_t*)&b->node()->lower_x;
|
||||
return sa < sb;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
|
||||
: bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
|
||||
{
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNRefitter<N>::refit()
|
||||
{
|
||||
if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
|
||||
bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
|
||||
}
|
||||
else
|
||||
{
|
||||
BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
|
||||
numSubTrees = 0;
|
||||
gather_subtree_refs(bvh->root,numSubTrees,0);
|
||||
if (numSubTrees)
|
||||
parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
NodeRef& ref = subTrees[i];
|
||||
subTreeBounds[i] = recurse_bottom(ref);
|
||||
}
|
||||
});
|
||||
|
||||
numSubTrees = 0;
|
||||
bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const size_t depth)
|
||||
{
|
||||
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
|
||||
{
|
||||
assert(subtrees < MAX_NUM_SUB_TREES);
|
||||
subTrees[subtrees++] = ref;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ref.isAABBNode())
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
for (size_t i=0; i<N; i++) {
|
||||
NodeRef& child = node->child(i);
|
||||
if (unlikely(child == BVH::emptyNode)) continue;
|
||||
gather_subtree_refs(child,subtrees,depth+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const BBox3fa *const subTreeBounds,
|
||||
const size_t depth)
|
||||
{
|
||||
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
|
||||
{
|
||||
assert(subtrees < MAX_NUM_SUB_TREES);
|
||||
assert(subTrees[subtrees] == ref);
|
||||
return subTreeBounds[subtrees++];
|
||||
}
|
||||
|
||||
if (ref.isAABBNode())
|
||||
{
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
BBox3fa bounds[N];
|
||||
|
||||
for (size_t i=0; i<N; i++)
|
||||
{
|
||||
NodeRef& child = node->child(i);
|
||||
|
||||
if (unlikely(child == BVH::emptyNode))
|
||||
bounds[i] = BBox3fa(empty);
|
||||
else
|
||||
bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
|
||||
}
|
||||
|
||||
BBox3vf<N> boundsT = transpose<N>(bounds);
|
||||
|
||||
/* set new bounds */
|
||||
node->lower_x = boundsT.lower.x;
|
||||
node->lower_y = boundsT.lower.y;
|
||||
node->lower_z = boundsT.lower.z;
|
||||
node->upper_x = boundsT.upper.x;
|
||||
node->upper_y = boundsT.upper.y;
|
||||
node->upper_z = boundsT.upper.z;
|
||||
|
||||
return merge<N>(bounds);
|
||||
}
|
||||
else
|
||||
return leafBounds.leafBounds(ref);
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// =========================================================
|
||||
// =========================================================
|
||||
|
||||
|
||||
template<int N>
|
||||
BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
|
||||
{
|
||||
/* this is a leaf node */
|
||||
if (unlikely(ref.isLeaf()))
|
||||
return leafBounds.leafBounds(ref);
|
||||
|
||||
/* recurse if this is an internal node */
|
||||
AABBNode* node = ref.getAABBNode();
|
||||
|
||||
/* enable exclusive prefetch for >= AVX platforms */
|
||||
#if defined(__AVX__)
|
||||
BVH::prefetchW(ref);
|
||||
#endif
|
||||
BBox3fa bounds[N];
|
||||
|
||||
for (size_t i=0; i<N; i++)
|
||||
if (unlikely(node->child(i) == BVH::emptyNode))
|
||||
{
|
||||
bounds[i] = BBox3fa(empty);
|
||||
}
|
||||
else
|
||||
bounds[i] = recurse_bottom(node->child(i));
|
||||
|
||||
/* AOS to SOA transform */
|
||||
BBox3vf<N> boundsT = transpose<N>(bounds);
|
||||
|
||||
/* set new bounds */
|
||||
node->lower_x = boundsT.lower.x;
|
||||
node->lower_y = boundsT.lower.y;
|
||||
node->lower_z = boundsT.lower.z;
|
||||
node->upper_x = boundsT.upper.x;
|
||||
node->upper_y = boundsT.upper.y;
|
||||
node->upper_z = boundsT.upper.z;
|
||||
|
||||
return merge<N>(bounds);
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
|
||||
: bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNRefitT<N,Mesh,Primitive>::clear()
|
||||
{
|
||||
if (builder)
|
||||
builder->clear();
|
||||
}
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
void BVHNRefitT<N,Mesh,Primitive>::build()
|
||||
{
|
||||
if (mesh->topologyChanged(topologyVersion)) {
|
||||
topologyVersion = mesh->getTopologyVersion();
|
||||
builder->build();
|
||||
}
|
||||
else
|
||||
refitter->refit();
|
||||
}
|
||||
|
||||
template class BVHNRefitter<4>;
|
||||
#if defined(__AVX__)
|
||||
template class BVHNRefitter<8>;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
|
||||
Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
|
||||
|
||||
Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
|
||||
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH4InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)accel,BVH4InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
|
||||
#if defined(__AVX__)
|
||||
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
|
||||
Builder* BVH8InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)accel,BVH8InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
95
engine/thirdparty/embree/kernels/bvh/bvh_refit.h
vendored
Normal file
95
engine/thirdparty/embree/kernels/bvh/bvh_refit.h
vendored
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../bvh/bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNRefitter
|
||||
{
|
||||
public:
|
||||
|
||||
/*! Type shortcuts */
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
struct LeafBoundsInterface {
|
||||
virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! Constructor. */
|
||||
BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
|
||||
|
||||
/*! refits the BVH */
|
||||
void refit();
|
||||
|
||||
private:
|
||||
/* single-threaded subtree extraction based on BVH depth */
|
||||
void gather_subtree_refs(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const size_t depth = 0);
|
||||
|
||||
/* single-threaded top-level refit */
|
||||
BBox3fa refit_toplevel(NodeRef& ref,
|
||||
size_t &subtrees,
|
||||
const BBox3fa *const subTreeBounds,
|
||||
const size_t depth = 0);
|
||||
|
||||
/* single-threaded subtree refit */
|
||||
BBox3fa recurse_bottom(NodeRef& ref);
|
||||
|
||||
public:
|
||||
BVH* bvh; //!< BVH to refit
|
||||
const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
|
||||
|
||||
static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
|
||||
static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
|
||||
size_t numSubTrees;
|
||||
NodeRef subTrees[MAX_NUM_SUB_TREES];
|
||||
};
|
||||
|
||||
template<int N, typename Mesh, typename Primitive>
|
||||
class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
|
||||
{
|
||||
public:
|
||||
|
||||
/*! Type shortcuts */
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
|
||||
|
||||
virtual void build();
|
||||
|
||||
virtual void clear();
|
||||
|
||||
virtual const BBox3fa leafBounds (NodeRef& ref) const
|
||||
{
|
||||
size_t num; char* prim = ref.leaf(num);
|
||||
if (unlikely(ref == BVH::emptyNode)) return empty;
|
||||
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0; i<num; i++)
|
||||
bounds.extend(((Primitive*)prim)[i].update(mesh));
|
||||
return bounds;
|
||||
}
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
std::unique_ptr<Builder> builder;
|
||||
std::unique_ptr<BVHNRefitter<N>> refitter;
|
||||
Mesh* mesh;
|
||||
unsigned int topologyVersion;
|
||||
};
|
||||
}
|
||||
}
|
||||
127
engine/thirdparty/embree/kernels/bvh/bvh_rotate.cpp
vendored
Normal file
127
engine/thirdparty/embree/kernels/bvh/bvh_rotate.cpp
vendored
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_rotate.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! Computes half surface area of box. */
|
||||
__forceinline float halfArea3f(const BBox<vfloat4>& box) {
|
||||
const vfloat4 d = box.size();
|
||||
const vfloat4 a = d*shuffle<1,2,0,3>(d);
|
||||
return a[0]+a[1]+a[2];
|
||||
}
|
||||
|
||||
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
|
||||
{
|
||||
/*! nothing to rotate if we reached a leaf node. */
|
||||
if (parentRef.isBarrier()) return 0;
|
||||
if (parentRef.isLeaf()) return 0;
|
||||
AABBNode* parent = parentRef.getAABBNode();
|
||||
|
||||
/*! rotate all children first */
|
||||
vint4 cdepth;
|
||||
for (size_t c=0; c<4; c++)
|
||||
cdepth[c] = (int)rotate(parent->child(c),depth+1);
|
||||
|
||||
/* compute current areas of all children */
|
||||
vfloat4 sizeX = parent->upper_x-parent->lower_x;
|
||||
vfloat4 sizeY = parent->upper_y-parent->lower_y;
|
||||
vfloat4 sizeZ = parent->upper_z-parent->lower_z;
|
||||
vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
|
||||
|
||||
/*! get node bounds */
|
||||
BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
|
||||
parent->bounds(child1_0,child1_1,child1_2,child1_3);
|
||||
|
||||
/*! Find best rotation. We pick a first child (child1) and a sub-child
|
||||
(child2child) of a different second child (child2), and swap child1
|
||||
and child2child. We perform the best such swap. */
|
||||
float bestArea = 0;
|
||||
size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
|
||||
for (size_t c2=0; c2<4; c2++)
|
||||
{
|
||||
/*! ignore leaf nodes as we cannot descent into them */
|
||||
if (parent->child(c2).isBarrier()) continue;
|
||||
if (parent->child(c2).isLeaf()) continue;
|
||||
AABBNode* child2 = parent->child(c2).getAABBNode();
|
||||
|
||||
/*! transpose child bounds */
|
||||
BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
|
||||
child2->bounds(child2c0,child2c1,child2c2,child2c3);
|
||||
|
||||
/*! put child1_0 at each child2 position */
|
||||
float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
|
||||
float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
|
||||
float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
|
||||
float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
|
||||
vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
|
||||
vfloat4 min0 = vreduce_min(cost0);
|
||||
int pos0 = (int)bsf(movemask(min0 == cost0));
|
||||
|
||||
/*! put child1_1 at each child2 position */
|
||||
float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
|
||||
float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
|
||||
float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
|
||||
float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
|
||||
vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
|
||||
vfloat4 min1 = vreduce_min(cost1);
|
||||
int pos1 = (int)bsf(movemask(min1 == cost1));
|
||||
|
||||
/*! put child1_2 at each child2 position */
|
||||
float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
|
||||
float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
|
||||
float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
|
||||
float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
|
||||
vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
|
||||
vfloat4 min2 = vreduce_min(cost2);
|
||||
int pos2 = (int)bsf(movemask(min2 == cost2));
|
||||
|
||||
/*! put child1_3 at each child2 position */
|
||||
float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
|
||||
float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
|
||||
float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
|
||||
float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
|
||||
vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
|
||||
vfloat4 min3 = vreduce_min(cost3);
|
||||
int pos3 = (int)bsf(movemask(min3 == cost3));
|
||||
|
||||
/*! find best other child */
|
||||
vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
|
||||
int pos[4] = { pos0,pos1,pos2,pos3 };
|
||||
const size_t mbd = BVH4::maxBuildDepth;
|
||||
vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
|
||||
valid &= vint4(int(c2)) != vint4(step);
|
||||
if (none(valid)) continue;
|
||||
size_t c1 = select_min(valid,area0123);
|
||||
float area = area0123[c1];
|
||||
if (c1 == c2) continue; // can happen if bounds are NANs
|
||||
|
||||
/*! accept a swap when it reduces cost and is not swapping a node with itself */
|
||||
if (area < bestArea) {
|
||||
bestArea = area;
|
||||
bestChild1 = c1;
|
||||
bestChild2 = c2;
|
||||
bestChild2Child = pos[c1];
|
||||
}
|
||||
}
|
||||
|
||||
/*! if we did not find a swap that improves the SAH then do nothing */
|
||||
if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
|
||||
|
||||
/*! perform the best found tree rotation */
|
||||
AABBNode* child2 = parent->child(bestChild2).getAABBNode();
|
||||
AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
|
||||
parent->setBounds(bestChild2,child2->bounds());
|
||||
AABBNode::compact(parent);
|
||||
AABBNode::compact(child2);
|
||||
|
||||
/*! This returned depth is conservative as the child that was
|
||||
* pulled up in the tree could have been on the critical path. */
|
||||
cdepth[bestChild1]++; // bestChild1 was pushed down one level
|
||||
return 1+reduce_max(cdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
37
engine/thirdparty/embree/kernels/bvh/bvh_rotate.h
vendored
Normal file
37
engine/thirdparty/embree/kernels/bvh/bvh_rotate.h
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
template<int N>
|
||||
class BVHNRotate
|
||||
{
|
||||
typedef typename BVHN<N>::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
static const bool enabled = false;
|
||||
|
||||
static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
|
||||
static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
|
||||
};
|
||||
|
||||
/* BVH4 tree rotations */
|
||||
template<>
|
||||
class BVHNRotate<4>
|
||||
{
|
||||
typedef BVH4::AABBNode AABBNode;
|
||||
typedef BVH4::NodeRef NodeRef;
|
||||
|
||||
public:
|
||||
static const bool enabled = true;
|
||||
|
||||
static size_t rotate(NodeRef parentRef, size_t depth = 1);
|
||||
};
|
||||
}
|
||||
}
|
||||
168
engine/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
vendored
Normal file
168
engine/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
vendored
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "bvh_statistics.h"
|
||||
#include "../../common/algorithms/parallel_reduce.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
|
||||
{
|
||||
double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
|
||||
stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
|
||||
}
|
||||
|
||||
template<int N>
|
||||
std::string BVHNStatistics<N>::str()
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
|
||||
size_t totalBytes = stat.bytes(bvh);
|
||||
double totalSAH = stat.sah(bvh);
|
||||
stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
|
||||
stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
|
||||
if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
|
||||
if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
template<int N>
|
||||
typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
|
||||
{
|
||||
Statistics s;
|
||||
assert(t0t1.size() > 0.0f);
|
||||
double dt = max(0.0f,t0t1.size());
|
||||
if (node.isAABBNode())
|
||||
{
|
||||
AABBNode* n = node.getAABBNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extend(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statAABBNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodes.numNodes++;
|
||||
s.statAABBNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isOBBNode())
|
||||
{
|
||||
OBBNode* n = node.ungetAABBNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statOBBNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statOBBNodes.numNodes++;
|
||||
s.statOBBNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isAABBNodeMB())
|
||||
{
|
||||
AABBNodeMB* n = node.getAABBNodeMB();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statAABBNodesMB.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodesMB.numNodes++;
|
||||
s.statAABBNodesMB.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isAABBNodeMB4D())
|
||||
{
|
||||
AABBNodeMB4D* n = node.getAABBNodeMB4D();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
|
||||
assert(!t0t1i.empty());
|
||||
const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1i);
|
||||
s.statAABBNodesMB4D.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statAABBNodesMB4D.numNodes++;
|
||||
s.statAABBNodesMB4D.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isOBBNodeMB())
|
||||
{
|
||||
OBBNodeMB* n = node.ungetAABBNodeMB();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent0(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statOBBNodesMB.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statOBBNodesMB.numNodes++;
|
||||
s.statOBBNodesMB.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isQuantizedNode())
|
||||
{
|
||||
QuantizedNode* n = node.quantizedNode();
|
||||
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
|
||||
if (n->child(i) == BVH::emptyNode) return Statistics();
|
||||
const double Ai = max(0.0f,halfArea(n->extent(i)));
|
||||
Statistics s = statistics(n->child(i),Ai,t0t1);
|
||||
s.statQuantizedNodes.numChildren++;
|
||||
return s;
|
||||
}, Statistics::add);
|
||||
s.statQuantizedNodes.numNodes++;
|
||||
s.statQuantizedNodes.nodeSAH += dt*A;
|
||||
s.depth++;
|
||||
}
|
||||
else if (node.isLeaf())
|
||||
{
|
||||
size_t num; const char* tri = node.leaf(num);
|
||||
if (num)
|
||||
{
|
||||
for (size_t i=0; i<num; i++)
|
||||
{
|
||||
const size_t bytes = bvh->primTy->getBytes(tri);
|
||||
s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
|
||||
s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
|
||||
s.statLeaf.numBytes += bytes;
|
||||
tri+=bytes;
|
||||
}
|
||||
s.statLeaf.numLeaves++;
|
||||
s.statLeaf.numPrimBlocks += num;
|
||||
s.statLeaf.leafSAH += dt*A*num;
|
||||
if (num-1 < Statistics::LeafStat::NHIST) {
|
||||
s.statLeaf.numPrimBlocksHistogram[num-1]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// -- GODOT start --
|
||||
// throw std::runtime_error("not supported node type in bvh_statistics");
|
||||
abort();
|
||||
// -- GODOT end --
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template class BVHNStatistics<8>;
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
|
||||
template class BVHNStatistics<4>;
|
||||
#endif
|
||||
}
|
||||
285
engine/thirdparty/embree/kernels/bvh/bvh_statistics.h
vendored
Normal file
285
engine/thirdparty/embree/kernels/bvh/bvh_statistics.h
vendored
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include <sstream>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
class BVHNStatistics
|
||||
{
|
||||
typedef BVHN<N> BVH;
|
||||
typedef typename BVH::AABBNode AABBNode;
|
||||
typedef typename BVH::OBBNode OBBNode;
|
||||
typedef typename BVH::AABBNodeMB AABBNodeMB;
|
||||
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
|
||||
typedef typename BVH::OBBNodeMB OBBNodeMB;
|
||||
typedef typename BVH::QuantizedNode QuantizedNode;
|
||||
|
||||
typedef typename BVH::NodeRef NodeRef;
|
||||
|
||||
struct Statistics
|
||||
{
|
||||
template<typename Node>
|
||||
struct NodeStat
|
||||
{
|
||||
NodeStat ( double nodeSAH = 0,
|
||||
size_t numNodes = 0,
|
||||
size_t numChildren = 0)
|
||||
: nodeSAH(nodeSAH),
|
||||
numNodes(numNodes),
|
||||
numChildren(numChildren) {}
|
||||
|
||||
double sah(BVH* bvh) const {
|
||||
return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
|
||||
}
|
||||
|
||||
size_t bytes() const {
|
||||
return numNodes*sizeof(Node);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return numNodes;
|
||||
}
|
||||
|
||||
double fillRateNom () const { return double(numChildren); }
|
||||
double fillRateDen () const { return double(numNodes*N); }
|
||||
double fillRate () const { return fillRateNom()/fillRateDen(); }
|
||||
|
||||
__forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
|
||||
{
|
||||
return NodeStat(a.nodeSAH + b.nodeSAH,
|
||||
a.numNodes+b.numNodes,
|
||||
a.numChildren+b.numChildren);
|
||||
}
|
||||
|
||||
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
|
||||
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
|
||||
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
|
||||
stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
public:
|
||||
double nodeSAH;
|
||||
size_t numNodes;
|
||||
size_t numChildren;
|
||||
};
|
||||
|
||||
struct LeafStat
|
||||
{
|
||||
static const int NHIST = 8;
|
||||
|
||||
LeafStat ( double leafSAH = 0.0f,
|
||||
size_t numLeaves = 0,
|
||||
size_t numPrimsActive = 0,
|
||||
size_t numPrimsTotal = 0,
|
||||
size_t numPrimBlocks = 0,
|
||||
size_t numBytes = 0)
|
||||
: leafSAH(leafSAH),
|
||||
numLeaves(numLeaves),
|
||||
numPrimsActive(numPrimsActive),
|
||||
numPrimsTotal(numPrimsTotal),
|
||||
numPrimBlocks(numPrimBlocks),
|
||||
numBytes(numBytes)
|
||||
{
|
||||
for (size_t i=0; i<NHIST; i++)
|
||||
numPrimBlocksHistogram[i] = 0;
|
||||
}
|
||||
|
||||
double sah(BVH* bvh) const {
|
||||
return leafSAH/bvh->getLinearBounds().expectedHalfArea();
|
||||
}
|
||||
|
||||
size_t bytes(BVH* bvh) const {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return numLeaves;
|
||||
}
|
||||
|
||||
double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
|
||||
double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
|
||||
double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
|
||||
|
||||
__forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
|
||||
{
|
||||
LeafStat stat(a.leafSAH + b.leafSAH,
|
||||
a.numLeaves+b.numLeaves,
|
||||
a.numPrimsActive+b.numPrimsActive,
|
||||
a.numPrimsTotal+b.numPrimsTotal,
|
||||
a.numPrimBlocks+b.numPrimBlocks,
|
||||
a.numBytes+b.numBytes);
|
||||
for (size_t i=0; i<NHIST; i++) {
|
||||
stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
|
||||
stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
|
||||
}
|
||||
return stat;
|
||||
}
|
||||
|
||||
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
|
||||
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
|
||||
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
|
||||
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
|
||||
stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
|
||||
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::string histToString() const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
for (size_t i=0; i<NHIST; i++)
|
||||
stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
public:
|
||||
double leafSAH; //!< SAH of the leaves only
|
||||
size_t numLeaves; //!< Number of leaf nodes.
|
||||
size_t numPrimsActive; //!< Number of active primitives (
|
||||
size_t numPrimsTotal; //!< Number of active and inactive primitives
|
||||
size_t numPrimBlocks; //!< Number of primitive blocks.
|
||||
size_t numBytes; //!< Number of bytes of leaves.
|
||||
size_t numPrimBlocksHistogram[8];
|
||||
};
|
||||
|
||||
public:
|
||||
Statistics (size_t depth = 0,
|
||||
LeafStat statLeaf = LeafStat(),
|
||||
NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
|
||||
NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
|
||||
NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
|
||||
NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
|
||||
NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
|
||||
NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
|
||||
|
||||
: depth(depth),
|
||||
statLeaf(statLeaf),
|
||||
statAABBNodes(statAABBNodes),
|
||||
statOBBNodes(statOBBNodes),
|
||||
statAABBNodesMB(statAABBNodesMB),
|
||||
statAABBNodesMB4D(statAABBNodesMB4D),
|
||||
statOBBNodesMB(statOBBNodesMB),
|
||||
statQuantizedNodes(statQuantizedNodes) {}
|
||||
|
||||
double sah(BVH* bvh) const
|
||||
{
|
||||
return statLeaf.sah(bvh) +
|
||||
statAABBNodes.sah(bvh) +
|
||||
statOBBNodes.sah(bvh) +
|
||||
statAABBNodesMB.sah(bvh) +
|
||||
statAABBNodesMB4D.sah(bvh) +
|
||||
statOBBNodesMB.sah(bvh) +
|
||||
statQuantizedNodes.sah(bvh);
|
||||
}
|
||||
|
||||
size_t bytes(BVH* bvh) const {
|
||||
return statLeaf.bytes(bvh) +
|
||||
statAABBNodes.bytes() +
|
||||
statOBBNodes.bytes() +
|
||||
statAABBNodesMB.bytes() +
|
||||
statAABBNodesMB4D.bytes() +
|
||||
statOBBNodesMB.bytes() +
|
||||
statQuantizedNodes.bytes();
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return statLeaf.size() +
|
||||
statAABBNodes.size() +
|
||||
statOBBNodes.size() +
|
||||
statAABBNodesMB.size() +
|
||||
statAABBNodesMB4D.size() +
|
||||
statOBBNodesMB.size() +
|
||||
statQuantizedNodes.size();
|
||||
}
|
||||
|
||||
double fillRate (BVH* bvh) const
|
||||
{
|
||||
double nom = statLeaf.fillRateNom(bvh) +
|
||||
statAABBNodes.fillRateNom() +
|
||||
statOBBNodes.fillRateNom() +
|
||||
statAABBNodesMB.fillRateNom() +
|
||||
statAABBNodesMB4D.fillRateNom() +
|
||||
statOBBNodesMB.fillRateNom() +
|
||||
statQuantizedNodes.fillRateNom();
|
||||
double den = statLeaf.fillRateDen(bvh) +
|
||||
statAABBNodes.fillRateDen() +
|
||||
statOBBNodes.fillRateDen() +
|
||||
statAABBNodesMB.fillRateDen() +
|
||||
statAABBNodesMB4D.fillRateDen() +
|
||||
statOBBNodesMB.fillRateDen() +
|
||||
statQuantizedNodes.fillRateDen();
|
||||
return nom/den;
|
||||
}
|
||||
|
||||
friend Statistics operator+ ( const Statistics& a, const Statistics& b )
|
||||
{
|
||||
return Statistics(max(a.depth,b.depth),
|
||||
a.statLeaf + b.statLeaf,
|
||||
a.statAABBNodes + b.statAABBNodes,
|
||||
a.statOBBNodes + b.statOBBNodes,
|
||||
a.statAABBNodesMB + b.statAABBNodesMB,
|
||||
a.statAABBNodesMB4D + b.statAABBNodesMB4D,
|
||||
a.statOBBNodesMB + b.statOBBNodesMB,
|
||||
a.statQuantizedNodes + b.statQuantizedNodes);
|
||||
}
|
||||
|
||||
static Statistics add ( const Statistics& a, const Statistics& b ) {
|
||||
return a+b;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t depth;
|
||||
LeafStat statLeaf;
|
||||
NodeStat<AABBNode> statAABBNodes;
|
||||
NodeStat<OBBNode> statOBBNodes;
|
||||
NodeStat<AABBNodeMB> statAABBNodesMB;
|
||||
NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
|
||||
NodeStat<OBBNodeMB> statOBBNodesMB;
|
||||
NodeStat<QuantizedNode> statQuantizedNodes;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/* Constructor gathers statistics. */
|
||||
BVHNStatistics (BVH* bvh);
|
||||
|
||||
/*! Convert statistics into a string */
|
||||
std::string str();
|
||||
|
||||
double sah() const {
|
||||
return stat.sah(bvh);
|
||||
}
|
||||
|
||||
size_t bytesUsed() const {
|
||||
return stat.bytes(bvh);
|
||||
}
|
||||
|
||||
private:
|
||||
Statistics statistics(NodeRef node, const double A, const BBox1f dt);
|
||||
|
||||
private:
|
||||
BVH* bvh;
|
||||
Statistics stat;
|
||||
};
|
||||
|
||||
typedef BVHNStatistics<4> BVH4Statistics;
|
||||
typedef BVHNStatistics<8> BVH8Statistics;
|
||||
}
|
||||
466
engine/thirdparty/embree/kernels/bvh/bvh_traverser1.h
vendored
Normal file
466
engine/thirdparty/embree/kernels/bvh/bvh_traverser1.h
vendored
Normal file
|
|
@ -0,0 +1,466 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
#include "node_intersector1.h"
|
||||
#include "../common/stack_item.h"
|
||||
|
||||
#define NEW_SORTING_CODE 1
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
/*! BVH regular node traversal for single rays. */
|
||||
template<int N, int types>
|
||||
class BVHNNodeTraverser1Hit;
|
||||
|
||||
#if defined(__AVX512VL__) // SKX
|
||||
|
||||
template<int N>
|
||||
__forceinline void isort_update(vint<N> &dist, const vint<N> &d)
|
||||
{
|
||||
const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
|
||||
const vboolf<N> m_geq = d >= dist;
|
||||
const vboolf<N> m_geq_shift = m_geq << 1;
|
||||
dist = select(m_geq,d,dist);
|
||||
dist = select(m_geq_shift,dist_shift,dist);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
|
||||
dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
|
||||
}
|
||||
|
||||
__forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
|
||||
return toScalar(permutex2var((__m256i)index,n0,n1));
|
||||
}
|
||||
|
||||
__forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
|
||||
return toScalar(permute(n,index));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Specialization for BVH4. */
|
||||
template<int types>
|
||||
class BVHNNodeTraverser1Hit<4, types>
|
||||
{
|
||||
typedef BVH4 BVH;
|
||||
typedef BVH4::NodeRef NodeRef;
|
||||
typedef BVH4::BaseNode BaseNode;
|
||||
|
||||
|
||||
public:
|
||||
/* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
|
||||
static __forceinline void traverseClosestHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat4& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
if (likely(mask == 0)) {
|
||||
assert(cur != BVH::emptyNode);
|
||||
return;
|
||||
}
|
||||
|
||||
/*! two children are hit, push far child, and continue with closer child */
|
||||
NodeRef c0 = cur;
|
||||
const unsigned int d0 = ((unsigned int*)&tNear)[r];
|
||||
r = bscf(mask);
|
||||
NodeRef c1 = node->child(r);
|
||||
BVH::prefetch(c1,types);
|
||||
const unsigned int d1 = ((unsigned int*)&tNear)[r];
|
||||
assert(c0 != BVH::emptyNode);
|
||||
assert(c1 != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
assert(stackPtr < stackEnd);
|
||||
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
|
||||
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
|
||||
}
|
||||
|
||||
#if NEW_SORTING_CODE == 1
|
||||
vint4 s0((size_t)c0,(size_t)d0);
|
||||
vint4 s1((size_t)c1,(size_t)d1);
|
||||
r = bscf(mask);
|
||||
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s2((size_t)c2,(size_t)d2);
|
||||
/* 3 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort3(s0,s1,s2);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
|
||||
cur = toSizeT(s2);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
r = bscf(mask);
|
||||
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s3((size_t)c3,(size_t)d3);
|
||||
/* 4 hits */
|
||||
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
|
||||
cur = toSizeT(s3);
|
||||
stackPtr+=3;
|
||||
#else
|
||||
/*! Here starts the slow path for 3 or 4 hit children. We push
|
||||
* all nodes onto the stack to sort them there. */
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
|
||||
|
||||
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
|
||||
static __forceinline void traverseAnyHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat4& tNear,
|
||||
NodeRef*& stackPtr,
|
||||
NodeRef* stackEnd)
|
||||
{
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
/* simpler in sequence traversal order */
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
|
||||
for (; ;)
|
||||
{
|
||||
r = bscf(mask);
|
||||
cur = node->child(r); BVH::prefetch(cur,types);
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* Specialization for BVH8. */
|
||||
template<int types>
|
||||
class BVHNNodeTraverser1Hit<8, types>
|
||||
{
|
||||
typedef BVH8 BVH;
|
||||
typedef BVH8::NodeRef NodeRef;
|
||||
typedef BVH8::BaseNode BaseNode;
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
template<class NodeRef, class BaseNode>
|
||||
static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
const BaseNode* node = cur.baseNode();
|
||||
const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
|
||||
const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
|
||||
vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
|
||||
distance_i = vint8::compact((int)mask,distance_i,distance_i);
|
||||
cur = permuteExtract(distance_i,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) return;
|
||||
|
||||
/* 2 hits: order A0 B0 */
|
||||
const vint8 d0(distance_i);
|
||||
const vint8 d1(shuffle<1>(distance_i));
|
||||
cur = permuteExtract(d1,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A0 = min(d0, d1);
|
||||
const vint8 dist_B0 = max(d0, d1);
|
||||
assert(dist_A0[0] < dist_B0[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A0,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
|
||||
stackPtr++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* 3 hits: order A1 B1 C1 */
|
||||
|
||||
const vint8 d2(shuffle<2>(distance_i));
|
||||
cur = permuteExtract(d2,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A1 = min(dist_A0,d2);
|
||||
const vint8 dist_tmp_B1 = max(dist_A0,d2);
|
||||
const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
|
||||
const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
|
||||
assert(dist_A1[0] < dist_B1[0]);
|
||||
assert(dist_B1[0] < dist_C1[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A1,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
|
||||
stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
|
||||
*(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
|
||||
/* 4 hits: order A2 B2 C2 D2 */
|
||||
|
||||
const vint8 d3(shuffle<3>(distance_i));
|
||||
cur = permuteExtract(d3,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
const vint8 dist_A2 = min(dist_A1,d3);
|
||||
const vint8 dist_tmp_B2 = max(dist_A1,d3);
|
||||
const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
|
||||
const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
|
||||
const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
|
||||
const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
|
||||
assert(dist_A2[0] < dist_B2[0]);
|
||||
assert(dist_B2[0] < dist_C2[0]);
|
||||
assert(dist_C2[0] < dist_D2[0]);
|
||||
|
||||
mask &= mask-1;
|
||||
if (likely(mask == 0)) {
|
||||
cur = permuteExtract(dist_A2,n0,n1);
|
||||
stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
|
||||
*(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
|
||||
stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
|
||||
*(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
|
||||
stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
|
||||
*(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
|
||||
stackPtr+=3;
|
||||
return;
|
||||
}
|
||||
|
||||
/* >=5 hits: reverse to descending order for writing to stack */
|
||||
|
||||
distance_i = align_shift_right<3>(distance_i,distance_i);
|
||||
const size_t hits = 4 + popcnt(mask);
|
||||
vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
|
||||
|
||||
isort_quick_update<8>(dist,dist_A2);
|
||||
isort_quick_update<8>(dist,dist_B2);
|
||||
isort_quick_update<8>(dist,dist_C2);
|
||||
isort_quick_update<8>(dist,dist_D2);
|
||||
|
||||
do {
|
||||
|
||||
distance_i = align_shift_right<1>(distance_i,distance_i);
|
||||
cur = permuteExtract(distance_i,n0,n1);
|
||||
BVH::prefetch(cur,types);
|
||||
const vint8 new_dist(permute(distance_i,vint8(zero)));
|
||||
mask &= mask-1;
|
||||
isort_update<8>(dist,new_dist);
|
||||
|
||||
} while(mask);
|
||||
|
||||
for (size_t i=0; i<7; i++)
|
||||
assert(dist[i+0]>=dist[i+1]);
|
||||
|
||||
for (size_t i=0;i<hits-1;i++)
|
||||
{
|
||||
stackPtr->ptr = permuteExtract(dist,n0,n1);
|
||||
*(float*)&stackPtr->dist = permuteExtract(dist,tNear);
|
||||
dist = align_shift_right<1>(dist,dist);
|
||||
stackPtr++;
|
||||
}
|
||||
cur = permuteExtract(dist,n0,n1);
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
static __forceinline void traverseClosestHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
StackItemT<NodeRef>*& stackPtr,
|
||||
StackItemT<NodeRef>* stackEnd)
|
||||
{
|
||||
assert(mask != 0);
|
||||
#if defined(__AVX512VL__)
|
||||
traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
|
||||
#else
|
||||
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
if (likely(mask == 0)) {
|
||||
assert(cur != BVH::emptyNode);
|
||||
return;
|
||||
}
|
||||
|
||||
/*! two children are hit, push far child, and continue with closer child */
|
||||
NodeRef c0 = cur;
|
||||
const unsigned int d0 = ((unsigned int*)&tNear)[r];
|
||||
r = bscf(mask);
|
||||
NodeRef c1 = node->child(r);
|
||||
BVH::prefetch(c1,types);
|
||||
const unsigned int d1 = ((unsigned int*)&tNear)[r];
|
||||
|
||||
assert(c0 != BVH::emptyNode);
|
||||
assert(c1 != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
assert(stackPtr < stackEnd);
|
||||
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
|
||||
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
|
||||
}
|
||||
#if NEW_SORTING_CODE == 1
|
||||
vint4 s0((size_t)c0,(size_t)d0);
|
||||
vint4 s1((size_t)c1,(size_t)d1);
|
||||
|
||||
r = bscf(mask);
|
||||
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s2((size_t)c2,(size_t)d2);
|
||||
/* 3 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort3(s0,s1,s2);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
|
||||
cur = toSizeT(s2);
|
||||
stackPtr+=2;
|
||||
return;
|
||||
}
|
||||
r = bscf(mask);
|
||||
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
|
||||
vint4 s3((size_t)c3,(size_t)d3);
|
||||
/* 4 hits */
|
||||
if (likely(mask == 0)) {
|
||||
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
|
||||
cur = toSizeT(s3);
|
||||
stackPtr+=3;
|
||||
return;
|
||||
}
|
||||
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
|
||||
/*! fallback case if more than 4 children are hit */
|
||||
StackItemT<NodeRef>* stackFirst = stackPtr;
|
||||
stackPtr+=4;
|
||||
while (1)
|
||||
{
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
|
||||
const vint4 s((size_t)c,(size_t)d);
|
||||
*(vint4*)stackPtr++ = s;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (unlikely(mask == 0)) break;
|
||||
}
|
||||
sort(stackFirst,stackPtr);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#else
|
||||
/*! Here starts the slow path for 3 or 4 hit children. We push
|
||||
* all nodes onto the stack to sort them there. */
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
|
||||
assert(stackPtr < stackEnd);
|
||||
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
|
||||
|
||||
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (likely(mask == 0)) {
|
||||
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
return;
|
||||
}
|
||||
/*! fallback case if more than 4 children are hit */
|
||||
StackItemT<NodeRef>* stackFirst = stackPtr-4;
|
||||
while (1)
|
||||
{
|
||||
assert(stackPtr < stackEnd);
|
||||
r = bscf(mask);
|
||||
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
|
||||
assert(c != BVH::emptyNode);
|
||||
if (unlikely(mask == 0)) break;
|
||||
}
|
||||
sort(stackFirst,stackPtr);
|
||||
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static __forceinline void traverseAnyHit(NodeRef& cur,
|
||||
size_t mask,
|
||||
const vfloat8& tNear,
|
||||
NodeRef*& stackPtr,
|
||||
NodeRef* stackEnd)
|
||||
{
|
||||
const BaseNode* node = cur.baseNode();
|
||||
|
||||
/*! one child is hit, continue with that child */
|
||||
size_t r = bscf(mask);
|
||||
cur = node->child(r);
|
||||
BVH::prefetch(cur,types);
|
||||
|
||||
/* simpler in sequence traversal order */
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
|
||||
for (; ;)
|
||||
{
|
||||
r = bscf(mask);
|
||||
cur = node->child(r); BVH::prefetch(cur,types);
|
||||
assert(cur != BVH::emptyNode);
|
||||
if (likely(mask == 0)) return;
|
||||
assert(stackPtr < stackEnd);
|
||||
*stackPtr = cur; stackPtr++;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
31
engine/thirdparty/embree/kernels/bvh/node_intersector.h
vendored
Normal file
31
engine/thirdparty/embree/kernels/bvh/node_intersector.h
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bvh.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
struct NearFarPrecalculations
|
||||
{
|
||||
size_t nearX, nearY, nearZ;
|
||||
size_t farX, farY, farZ;
|
||||
|
||||
__forceinline NearFarPrecalculations() {}
|
||||
|
||||
__forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
|
||||
{
|
||||
const size_t size = sizeof(float)*N;
|
||||
nearX = (dir.x < 0.0f) ? 1*size : 0*size;
|
||||
nearY = (dir.y < 0.0f) ? 3*size : 2*size;
|
||||
nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
|
||||
farX = nearX ^ size;
|
||||
farY = nearY ^ size;
|
||||
farZ = nearZ ^ size;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
1492
engine/thirdparty/embree/kernels/bvh/node_intersector1.h
vendored
Normal file
1492
engine/thirdparty/embree/kernels/bvh/node_intersector1.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
257
engine/thirdparty/embree/kernels/bvh/node_intersector_frustum.h
vendored
Normal file
257
engine/thirdparty/embree/kernels/bvh/node_intersector_frustum.h
vendored
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_intersector.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Frustum structure used in hybrid and stream traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
Optimized frustum test. We calculate t=(p-org)/dir in ray/box
|
||||
intersection. We assume the rays are split by octant, thus
|
||||
dir intervals are either positive or negative in each
|
||||
dimension.
|
||||
|
||||
Case 1: dir.min >= 0 && dir.max >= 0:
|
||||
t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
|
||||
t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
|
||||
|
||||
Case 2: dir.min < 0 && dir.max < 0:
|
||||
t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
|
||||
t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
|
||||
*/
|
||||
|
||||
template<bool robust>
|
||||
struct Frustum;
|
||||
|
||||
/* Fast variant */
|
||||
template<>
|
||||
struct Frustum<false>
|
||||
{
|
||||
__forceinline Frustum() {}
|
||||
|
||||
template<int K>
|
||||
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
|
||||
reduce_min(select(valid, org.y, pos_inf)),
|
||||
reduce_min(select(valid, org.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
|
||||
reduce_max(select(valid, org.y, neg_inf)),
|
||||
reduce_max(select(valid, org.z, neg_inf)));
|
||||
|
||||
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
|
||||
reduce_min(select(valid, rdir.y, pos_inf)),
|
||||
reduce_min(select(valid, rdir.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
|
||||
reduce_max(select(valid, rdir.y, neg_inf)),
|
||||
reduce_max(select(valid, rdir.z, neg_inf)));
|
||||
|
||||
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
|
||||
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
|
||||
|
||||
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3fa& reduced_min_org,
|
||||
const Vec3fa& reduced_max_org,
|
||||
const Vec3fa& reduced_min_rdir,
|
||||
const Vec3fa& reduced_max_rdir,
|
||||
float reduced_min_dist,
|
||||
float reduced_max_dist,
|
||||
int N)
|
||||
{
|
||||
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
|
||||
|
||||
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
|
||||
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
|
||||
|
||||
#if defined (__aarch64__)
|
||||
neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
|
||||
neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
|
||||
#else
|
||||
min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
|
||||
max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
|
||||
#endif
|
||||
min_dist = reduced_min_dist;
|
||||
max_dist = reduced_max_dist;
|
||||
|
||||
nf = NearFarPrecalculations(min_rdir, N);
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
|
||||
{
|
||||
max_dist = reduce_max(ray_tfar);
|
||||
}
|
||||
|
||||
NearFarPrecalculations nf;
|
||||
|
||||
Vec3fa min_rdir;
|
||||
Vec3fa max_rdir;
|
||||
|
||||
#if defined (__aarch64__)
|
||||
Vec3fa neg_min_org_rdir;
|
||||
Vec3fa neg_max_org_rdir;
|
||||
#else
|
||||
Vec3fa min_org_rdir;
|
||||
Vec3fa max_org_rdir;
|
||||
#endif
|
||||
float min_dist;
|
||||
float max_dist;
|
||||
};
|
||||
|
||||
typedef Frustum<false> FrustumFast;
|
||||
|
||||
/* Robust variant */
|
||||
template<>
|
||||
struct Frustum<true>
|
||||
{
|
||||
__forceinline Frustum() {}
|
||||
|
||||
template<int K>
|
||||
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
|
||||
reduce_min(select(valid, org.y, pos_inf)),
|
||||
reduce_min(select(valid, org.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
|
||||
reduce_max(select(valid, org.y, neg_inf)),
|
||||
reduce_max(select(valid, org.z, neg_inf)));
|
||||
|
||||
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
|
||||
reduce_min(select(valid, rdir.y, pos_inf)),
|
||||
reduce_min(select(valid, rdir.z, pos_inf)));
|
||||
|
||||
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
|
||||
reduce_max(select(valid, rdir.y, neg_inf)),
|
||||
reduce_max(select(valid, rdir.z, neg_inf)));
|
||||
|
||||
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
|
||||
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
|
||||
|
||||
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3fa& reduced_min_org,
|
||||
const Vec3fa& reduced_max_org,
|
||||
const Vec3fa& reduced_min_rdir,
|
||||
const Vec3fa& reduced_max_rdir,
|
||||
float reduced_min_dist,
|
||||
float reduced_max_dist,
|
||||
int N)
|
||||
{
|
||||
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
|
||||
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
|
||||
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
|
||||
|
||||
min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
|
||||
max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
|
||||
|
||||
min_dist = reduced_min_dist;
|
||||
max_dist = reduced_max_dist;
|
||||
|
||||
nf = NearFarPrecalculations(min_rdir, N);
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
|
||||
{
|
||||
max_dist = reduce_max(ray_tfar);
|
||||
}
|
||||
|
||||
NearFarPrecalculations nf;
|
||||
|
||||
Vec3fa min_rdir;
|
||||
Vec3fa max_rdir;
|
||||
|
||||
Vec3fa min_org;
|
||||
Vec3fa max_org;
|
||||
|
||||
float min_dist;
|
||||
float max_dist;
|
||||
};
|
||||
|
||||
typedef Frustum<true> FrustumRobust;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
|
||||
const FrustumFast& frustum, vfloat<N>& dist)
|
||||
{
|
||||
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
|
||||
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
|
||||
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
|
||||
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
|
||||
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
|
||||
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
|
||||
|
||||
#if defined (__aarch64__)
|
||||
const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
|
||||
const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
|
||||
const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
|
||||
const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
|
||||
const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
|
||||
const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
|
||||
#else
|
||||
const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
|
||||
const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
|
||||
const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
|
||||
const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
|
||||
const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
|
||||
const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
|
||||
#endif
|
||||
const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
|
||||
dist = fmin;
|
||||
const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
|
||||
const vbool<N> vmask_node_hit = fmin <= fmax;
|
||||
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
|
||||
return m_node;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N>
|
||||
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
|
||||
const FrustumRobust& frustum, vfloat<N>& dist)
|
||||
{
|
||||
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
|
||||
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
|
||||
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
|
||||
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
|
||||
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
|
||||
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
|
||||
|
||||
const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
|
||||
const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
|
||||
const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
|
||||
const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
|
||||
const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
|
||||
const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
|
||||
|
||||
const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
|
||||
const float round_up = 1.0f+2.0f*float(ulp);
|
||||
const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
|
||||
dist = fmin;
|
||||
const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
|
||||
const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
|
||||
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
|
||||
return m_node;
|
||||
}
|
||||
}
|
||||
}
|
||||
844
engine/thirdparty/embree/kernels/bvh/node_intersector_packet.h
vendored
Normal file
844
engine/thirdparty/embree/kernels/bvh/node_intersector_packet.h
vendored
Normal file
|
|
@ -0,0 +1,844 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "node_intersector.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ray packet structure used in hybrid traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int K, bool robust>
|
||||
struct TravRayK;
|
||||
|
||||
/* Fast variant */
|
||||
template<int K>
|
||||
struct TravRayK<K, false>
|
||||
{
|
||||
__forceinline TravRayK() {}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
tnear = ray_tnear;
|
||||
tfar = ray_tfar;
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
org = ray_org;
|
||||
dir = ray_dir;
|
||||
rdir = rcp_safe(ray_dir);
|
||||
#if defined(__aarch64__)
|
||||
neg_org_rdir = -(org * rdir);
|
||||
#elif defined(__AVX2__)
|
||||
org_rdir = org * rdir;
|
||||
#endif
|
||||
|
||||
if (N)
|
||||
{
|
||||
const int size = sizeof(float)*N;
|
||||
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
||||
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
||||
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
||||
}
|
||||
}
|
||||
|
||||
Vec3vf<K> org;
|
||||
Vec3vf<K> dir;
|
||||
Vec3vf<K> rdir;
|
||||
#if defined(__aarch64__)
|
||||
Vec3vf<K> neg_org_rdir;
|
||||
#elif defined(__AVX2__)
|
||||
Vec3vf<K> org_rdir;
|
||||
#endif
|
||||
Vec3vi<K> nearXYZ;
|
||||
vfloat<K> tnear;
|
||||
vfloat<K> tfar;
|
||||
};
|
||||
|
||||
template<int K>
|
||||
using TravRayKFast = TravRayK<K, false>;
|
||||
|
||||
/* Robust variant */
|
||||
template<int K>
|
||||
struct TravRayK<K, true>
|
||||
{
|
||||
__forceinline TravRayK() {}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
}
|
||||
|
||||
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
||||
{
|
||||
init(ray_org, ray_dir, N);
|
||||
tnear = ray_tnear;
|
||||
tfar = ray_tfar;
|
||||
}
|
||||
|
||||
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
||||
{
|
||||
org = ray_org;
|
||||
dir = ray_dir;
|
||||
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
|
||||
|
||||
if (N)
|
||||
{
|
||||
const int size = sizeof(float)*N;
|
||||
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
||||
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
||||
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
||||
}
|
||||
}
|
||||
|
||||
Vec3vf<K> org;
|
||||
Vec3vf<K> dir;
|
||||
Vec3vf<K> rdir;
|
||||
Vec3vi<K> nearXYZ;
|
||||
vfloat<K> tnear;
|
||||
vfloat<K> tfar;
|
||||
};
|
||||
|
||||
template<int K>
|
||||
using TravRayKRobust = TravRayK<K, true>;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
|
||||
const TravRayKFast<K>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
|
||||
const TravRayKRobust<K>& ray, vfloat<K>& dist)
|
||||
{
|
||||
// FIXME: use per instruction rounding for AVX512
|
||||
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast AABBNodeMB4D intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
||||
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
||||
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
if (unlikely(ref.isAABBNodeMB4D())) {
|
||||
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
||||
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
||||
}
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Robust AABBNodeMB4D intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
||||
|
||||
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
||||
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
||||
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
||||
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
||||
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
||||
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
||||
|
||||
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
||||
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
|
||||
if (unlikely(ref.isAABBNodeMB4D())) {
|
||||
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
||||
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
||||
}
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast OBBNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K, bool robust>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
|
||||
const TravRayK<K,robust>& ray, vfloat<K>& dist)
|
||||
{
|
||||
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
|
||||
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
|
||||
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
|
||||
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
|
||||
|
||||
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
|
||||
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
|
||||
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
|
||||
|
||||
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
|
||||
const vfloat<K> lclipMinY = org.y * nrdir.y;
|
||||
const vfloat<K> lclipMinZ = org.z * nrdir.z;
|
||||
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
|
||||
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
|
||||
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
|
||||
|
||||
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
if (robust) {
|
||||
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
||||
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
||||
}
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast OBBNodeMB intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K, bool robust>
|
||||
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
|
||||
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
|
||||
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
|
||||
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
|
||||
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
|
||||
|
||||
const Vec3vf<K> b0_lower = zero;
|
||||
const Vec3vf<K> b0_upper = one;
|
||||
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
|
||||
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
|
||||
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
|
||||
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
|
||||
|
||||
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
|
||||
const Vec3vf<K> rdir = rcp_safe(dir);
|
||||
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
|
||||
|
||||
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
|
||||
|
||||
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
if (robust) {
|
||||
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
||||
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
||||
}
|
||||
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// QuantizedBaseNode intersection
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
||||
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
const vfloat<N> lower_x = node->dequantizeLowerX();
|
||||
const vfloat<N> upper_x = node->dequantizeUpperX();
|
||||
const vfloat<N> lower_y = node->dequantizeLowerY();
|
||||
const vfloat<N> upper_y = node->dequantizeUpperY();
|
||||
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
||||
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) // SKX
|
||||
if (K == 16)
|
||||
{
|
||||
/* use mixed float/int min/max */
|
||||
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
||||
#if defined(__AVX512F__) // SKX
|
||||
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
||||
#else
|
||||
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
||||
#endif
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
}
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
||||
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
const vfloat<N> lower_x = node->dequantizeLowerX();
|
||||
const vfloat<N> upper_x = node->dequantizeUpperX();
|
||||
const vfloat<N> lower_y = node->dequantizeLowerY();
|
||||
const vfloat<N> upper_y = node->dequantizeUpperY();
|
||||
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
||||
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
||||
|
||||
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
|
||||
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
||||
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
||||
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
||||
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
||||
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
||||
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
||||
|
||||
#if defined(__aarch64__)
|
||||
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
||||
#elif defined(__AVX2__)
|
||||
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
|
||||
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
|
||||
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
|
||||
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
|
||||
#else
|
||||
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
||||
#endif
|
||||
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
template<int N, int K>
|
||||
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
|
||||
{
|
||||
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
||||
|
||||
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
||||
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
||||
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
||||
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
||||
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
||||
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
||||
|
||||
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
||||
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
||||
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
||||
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
||||
|
||||
const float round_up = 1.0f+3.0f*float(ulp);
|
||||
const float round_down = 1.0f-3.0f*float(ulp);
|
||||
|
||||
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
||||
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
||||
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
||||
dist = lnearP;
|
||||
return lhit;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Node intersectors used in hybrid traversal
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*! Intersects N nodes with K rays */
|
||||
template<int N, int K, int types, bool robust>
|
||||
struct BVHNNodeIntersectorK;
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
|
||||
{
|
||||
/* vmask is both an input and an output parameter! Its initial value should be the parent node
|
||||
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
|
||||
is actually required only for motion blur node intersections (because different rays may
|
||||
have different times), so for regular nodes vmask is simply overwritten. */
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
||||
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
||||
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
||||
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
||||
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
||||
assert(node.isOBBNodeMB());
|
||||
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
|
||||
{
|
||||
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
||||
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
||||
{
|
||||
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
||||
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
||||
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
||||
assert(node.isOBBNodeMB());
|
||||
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*! Intersects N nodes with K rays */
|
||||
template<int N, int K, bool robust>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK;
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
|
||||
{
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
||||
}
|
||||
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<int N, int K>
|
||||
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
|
||||
{
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
||||
}
|
||||
|
||||
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
||||
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
||||
{
|
||||
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
474
engine/thirdparty/embree/kernels/common/accel.h
vendored
Normal file
474
engine/thirdparty/embree/kernels/common/accel.h
vendored
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "ray.h"
|
||||
#include "point_query.h"
|
||||
#include "context.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class Scene;
|
||||
|
||||
/*! Base class for the acceleration structure data. */
|
||||
class AccelData : public RefCount
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
public:
|
||||
enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4, TY_GPU = 5 };
|
||||
|
||||
public:
|
||||
AccelData (const Type type)
|
||||
: bounds(empty), type(type) {}
|
||||
|
||||
/*! notifies the acceleration structure about the deletion of some geometry */
|
||||
virtual void deleteGeometry(size_t geomID) {};
|
||||
|
||||
/*! clears the acceleration structure data */
|
||||
virtual void clear() = 0;
|
||||
|
||||
/*! returns normal bounds */
|
||||
__forceinline BBox3fa getBounds() const {
|
||||
return bounds.bounds();
|
||||
}
|
||||
|
||||
/*! returns bounds for some time */
|
||||
__forceinline BBox3fa getBounds(float t) const {
|
||||
return bounds.interpolate(t);
|
||||
}
|
||||
|
||||
/*! returns linear bounds */
|
||||
__forceinline LBBox3fa getLinearBounds() const {
|
||||
return bounds;
|
||||
}
|
||||
|
||||
/*! checks if acceleration structure is empty */
|
||||
__forceinline bool isEmpty() const {
|
||||
return bounds.bounds0.lower.x == float(pos_inf);
|
||||
}
|
||||
|
||||
public:
|
||||
LBBox3fa bounds; // linear bounds
|
||||
Type type;
|
||||
};
|
||||
|
||||
/*! Base class for all intersectable and buildable acceleration structures. */
|
||||
class Accel : public AccelData
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
public:
|
||||
|
||||
struct Intersectors;
|
||||
|
||||
/*! Type of collide function */
|
||||
typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr);
|
||||
|
||||
/*! Type of point query function */
|
||||
typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */
|
||||
PointQuery* query, /*!< point query for lookup */
|
||||
PointQueryContext* context); /*!< point query context */
|
||||
|
||||
/*! Type of intersect function pointer for single rays. */
|
||||
typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRayHit& ray, /*!< ray to intersect */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of intersect function pointer for ray packets of size 4. */
|
||||
typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRayHit4& ray, /*!< ray packet to intersect */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of intersect function pointer for ray packets of size 8. */
|
||||
typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRayHit8& ray, /*!< ray packet to intersect */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of intersect function pointer for ray packets of size 16. */
|
||||
typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRayHit16& ray, /*!< ray packet to intersect */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of occlusion function pointer for single rays. */
|
||||
typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRay& ray, /*!< ray to test occlusion */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of occlusion function pointer for ray packets of size 4. */
|
||||
typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRay4& ray, /*!< ray packet to test occlusion. */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of occlusion function pointer for ray packets of size 8. */
|
||||
typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRay8& ray, /*!< ray packet to test occlusion. */
|
||||
RayQueryContext* context);
|
||||
|
||||
/*! Type of occlusion function pointer for ray packets of size 16. */
|
||||
typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */
|
||||
Intersectors* This, /*!< this pointer to accel */
|
||||
RTCRay16& ray, /*!< ray packet to test occlusion. */
|
||||
RayQueryContext* context);
|
||||
|
||||
typedef void (*ErrorFunc) ();
|
||||
|
||||
struct Collider
|
||||
{
|
||||
Collider (ErrorFunc error = nullptr)
|
||||
: collide((CollideFunc)error), name(nullptr) {}
|
||||
|
||||
Collider (CollideFunc collide, const char* name)
|
||||
: collide(collide), name(name) {}
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
CollideFunc collide;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
struct Intersector1
|
||||
{
|
||||
Intersector1 (ErrorFunc error = nullptr)
|
||||
: intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {}
|
||||
|
||||
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name)
|
||||
: intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {}
|
||||
|
||||
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name)
|
||||
: intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {}
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
static const char* type;
|
||||
IntersectFunc intersect;
|
||||
OccludedFunc occluded;
|
||||
PointQueryFunc pointQuery;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
struct Intersector4
|
||||
{
|
||||
Intersector4 (ErrorFunc error = nullptr)
|
||||
: intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {}
|
||||
|
||||
Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name)
|
||||
: intersect(intersect), occluded(occluded), name(name) {}
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
static const char* type;
|
||||
IntersectFunc4 intersect;
|
||||
OccludedFunc4 occluded;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
struct Intersector8
|
||||
{
|
||||
Intersector8 (ErrorFunc error = nullptr)
|
||||
: intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {}
|
||||
|
||||
Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name)
|
||||
: intersect(intersect), occluded(occluded), name(name) {}
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
static const char* type;
|
||||
IntersectFunc8 intersect;
|
||||
OccludedFunc8 occluded;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
struct Intersector16
|
||||
{
|
||||
Intersector16 (ErrorFunc error = nullptr)
|
||||
: intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {}
|
||||
|
||||
Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name)
|
||||
: intersect(intersect), occluded(occluded), name(name) {}
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
static const char* type;
|
||||
IntersectFunc16 intersect;
|
||||
OccludedFunc16 occluded;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
struct Intersectors
|
||||
{
|
||||
Intersectors()
|
||||
: ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr) {}
|
||||
|
||||
Intersectors (ErrorFunc error)
|
||||
: ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error) {}
|
||||
|
||||
void print(size_t ident)
|
||||
{
|
||||
if (collider.name) {
|
||||
for (size_t i=0; i<ident; i++) std::cout << " ";
|
||||
std::cout << "collider = " << collider.name << std::endl;
|
||||
}
|
||||
if (intersector1.name) {
|
||||
for (size_t i=0; i<ident; i++) std::cout << " ";
|
||||
std::cout << "intersector1 = " << intersector1.name << std::endl;
|
||||
}
|
||||
if (intersector4.name) {
|
||||
for (size_t i=0; i<ident; i++) std::cout << " ";
|
||||
std::cout << "intersector4 = " << intersector4.name << std::endl;
|
||||
}
|
||||
if (intersector8.name) {
|
||||
for (size_t i=0; i<ident; i++) std::cout << " ";
|
||||
std::cout << "intersector8 = " << intersector8.name << std::endl;
|
||||
}
|
||||
if (intersector16.name) {
|
||||
for (size_t i=0; i<ident; i++) std::cout << " ";
|
||||
std::cout << "intersector16 = " << intersector16.name << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void select(bool filter)
|
||||
{
|
||||
if (intersector4_filter) {
|
||||
if (filter) intersector4 = intersector4_filter;
|
||||
else intersector4 = intersector4_nofilter;
|
||||
}
|
||||
if (intersector8_filter) {
|
||||
if (filter) intersector8 = intersector8_filter;
|
||||
else intersector8 = intersector8_nofilter;
|
||||
}
|
||||
if (intersector16_filter) {
|
||||
if (filter) intersector16 = intersector16_filter;
|
||||
else intersector16 = intersector16_nofilter;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) {
|
||||
assert(intersector1.pointQuery);
|
||||
return intersector1.pointQuery(this,query,context);
|
||||
}
|
||||
|
||||
/*! collides two scenes */
|
||||
__forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) {
|
||||
assert(collider.collide);
|
||||
collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr);
|
||||
}
|
||||
|
||||
/*! Intersects a single ray with the scene. */
|
||||
__forceinline void intersect (RTCRayHit& ray, RayQueryContext* context) {
|
||||
assert(intersector1.intersect);
|
||||
intersector1.intersect(this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 4 rays with the scene. */
|
||||
__forceinline void intersect4 (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
|
||||
assert(intersector4.intersect);
|
||||
intersector4.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 8 rays with the scene. */
|
||||
__forceinline void intersect8 (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
|
||||
assert(intersector8.intersect);
|
||||
intersector8.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 16 rays with the scene. */
|
||||
__forceinline void intersect16 (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
|
||||
assert(intersector16.intersect);
|
||||
intersector16.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 4 rays with the scene. */
|
||||
__forceinline void intersect (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
|
||||
assert(intersector4.intersect);
|
||||
intersector4.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 8 rays with the scene. */
|
||||
__forceinline void intersect (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
|
||||
assert(intersector8.intersect);
|
||||
intersector8.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Intersects a packet of 16 rays with the scene. */
|
||||
__forceinline void intersect (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
|
||||
assert(intersector16.intersect);
|
||||
intersector16.intersect(valid,this,ray,context);
|
||||
}
|
||||
|
||||
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||
__forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, RayQueryContext* context) {
|
||||
const vint<4> mask = valid.mask32();
|
||||
intersect4(&mask,(RTCRayHit4&)ray,context);
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
__forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, RayQueryContext* context) {
|
||||
const vint<8> mask = valid.mask32();
|
||||
intersect8(&mask,(RTCRayHit8&)ray,context);
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX512F__)
|
||||
__forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, RayQueryContext* context) {
|
||||
const vint<16> mask = valid.mask32();
|
||||
intersect16(&mask,(RTCRayHit16&)ray,context);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! Tests if single ray is occluded by the scene. */
|
||||
__forceinline void occluded (RTCRay& ray, RayQueryContext* context) {
|
||||
assert(intersector1.occluded);
|
||||
intersector1.occluded(this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 4 rays is occluded by the scene. */
|
||||
__forceinline void occluded4 (const void* valid, RTCRay4& ray, RayQueryContext* context) {
|
||||
assert(intersector4.occluded);
|
||||
intersector4.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 8 rays is occluded by the scene. */
|
||||
__forceinline void occluded8 (const void* valid, RTCRay8& ray, RayQueryContext* context) {
|
||||
assert(intersector8.occluded);
|
||||
intersector8.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 16 rays is occluded by the scene. */
|
||||
__forceinline void occluded16 (const void* valid, RTCRay16& ray, RayQueryContext* context) {
|
||||
assert(intersector16.occluded);
|
||||
intersector16.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 4 rays is occluded by the scene. */
|
||||
__forceinline void occluded (const void* valid, RTCRay4& ray, RayQueryContext* context) {
|
||||
assert(intersector4.occluded);
|
||||
intersector4.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 8 rays is occluded by the scene. */
|
||||
__forceinline void occluded (const void* valid, RTCRay8& ray, RayQueryContext* context) {
|
||||
assert(intersector8.occluded);
|
||||
intersector8.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of 16 rays is occluded by the scene. */
|
||||
__forceinline void occluded (const void* valid, RTCRay16& ray, RayQueryContext* context) {
|
||||
assert(intersector16.occluded);
|
||||
intersector16.occluded(valid,this,ray,context);
|
||||
}
|
||||
|
||||
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||
__forceinline void occluded(const vbool4& valid, RayK<4>& ray, RayQueryContext* context) {
|
||||
const vint<4> mask = valid.mask32();
|
||||
occluded4(&mask,(RTCRay4&)ray,context);
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
__forceinline void occluded(const vbool8& valid, RayK<8>& ray, RayQueryContext* context) {
|
||||
const vint<8> mask = valid.mask32();
|
||||
occluded8(&mask,(RTCRay8&)ray,context);
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX512F__)
|
||||
__forceinline void occluded(const vbool16& valid, RayK<16>& ray, RayQueryContext* context) {
|
||||
const vint<16> mask = valid.mask32();
|
||||
occluded16(&mask,(RTCRay16&)ray,context);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! Tests if single ray is occluded by the scene. */
|
||||
__forceinline void intersect(RTCRay& ray, RayQueryContext* context) {
|
||||
occluded(ray, context);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of K rays is occluded by the scene. */
|
||||
template<int K>
|
||||
__forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, RayQueryContext* context) {
|
||||
occluded(valid, ray, context);
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
AccelData* ptr;
|
||||
void* leafIntersector;
|
||||
Collider collider;
|
||||
Intersector1 intersector1;
|
||||
Intersector4 intersector4;
|
||||
Intersector4 intersector4_filter;
|
||||
Intersector4 intersector4_nofilter;
|
||||
Intersector8 intersector8;
|
||||
Intersector8 intersector8_filter;
|
||||
Intersector8 intersector8_nofilter;
|
||||
Intersector16 intersector16;
|
||||
Intersector16 intersector16_filter;
|
||||
Intersector16 intersector16_nofilter;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! Construction */
|
||||
Accel (const AccelData::Type type)
|
||||
: AccelData(type) {}
|
||||
|
||||
/*! Construction */
|
||||
Accel (const AccelData::Type type, const Intersectors& intersectors)
|
||||
: AccelData(type), intersectors(intersectors) {}
|
||||
|
||||
/*! Virtual destructor */
|
||||
virtual ~Accel() {}
|
||||
|
||||
/*! makes the acceleration structure immutable */
|
||||
virtual void immutable () {}
|
||||
|
||||
/*! build acceleration structure */
|
||||
virtual void build () = 0;
|
||||
|
||||
public:
|
||||
Intersectors intersectors;
|
||||
};
|
||||
|
||||
#define DEFINE_COLLIDER(symbol,collider) \
|
||||
Accel::Collider symbol() { \
|
||||
return Accel::Collider((Accel::CollideFunc)collider::collide, \
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTERSECTOR1(symbol,intersector) \
|
||||
Accel::Intersector1 symbol() { \
|
||||
return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \
|
||||
(Accel::OccludedFunc )intersector::occluded, \
|
||||
(Accel::PointQueryFunc)intersector::pointQuery,\
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTERSECTOR4(symbol,intersector) \
|
||||
Accel::Intersector4 symbol() { \
|
||||
return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \
|
||||
(Accel::OccludedFunc4)intersector::occluded, \
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTERSECTOR8(symbol,intersector) \
|
||||
Accel::Intersector8 symbol() { \
|
||||
return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \
|
||||
(Accel::OccludedFunc8)intersector::occluded, \
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTERSECTOR16(symbol,intersector) \
|
||||
Accel::Intersector16 symbol() { \
|
||||
return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \
|
||||
(Accel::OccludedFunc16)intersector::occluded, \
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
}
|
||||
41
engine/thirdparty/embree/kernels/common/accelinstance.h
vendored
Normal file
41
engine/thirdparty/embree/kernels/common/accelinstance.h
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "accel.h"
|
||||
#include "builder.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class AccelInstance : public Accel
|
||||
{
|
||||
public:
|
||||
AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors)
|
||||
: Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {}
|
||||
|
||||
void immutable () {
|
||||
builder.reset(nullptr);
|
||||
}
|
||||
|
||||
public:
|
||||
void build () {
|
||||
if (builder) builder->build();
|
||||
bounds = accel->bounds;
|
||||
}
|
||||
|
||||
void deleteGeometry(size_t geomID) {
|
||||
if (accel ) accel->deleteGeometry(geomID);
|
||||
if (builder) builder->deleteGeometry(geomID);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
if (accel) accel->clear();
|
||||
if (builder) builder->clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<AccelData> accel;
|
||||
std::unique_ptr<Builder> builder;
|
||||
};
|
||||
}
|
||||
214
engine/thirdparty/embree/kernels/common/acceln.cpp
vendored
Normal file
214
engine/thirdparty/embree/kernels/common/acceln.cpp
vendored
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "acceln.h"
|
||||
#include "ray.h"
|
||||
#include "../../include/embree4/rtcore_ray.h"
|
||||
#include "../../common/algorithms/parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
AccelN::AccelN()
|
||||
: Accel(AccelData::TY_ACCELN), accels() {}
|
||||
|
||||
AccelN::~AccelN()
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
delete accels[i];
|
||||
}
|
||||
|
||||
void AccelN::accels_add(Accel* accel)
|
||||
{
|
||||
assert(accel);
|
||||
accels.push_back(accel);
|
||||
}
|
||||
|
||||
void AccelN::accels_init()
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
delete accels[i];
|
||||
|
||||
accels.clear();
|
||||
}
|
||||
|
||||
bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
bool changed = false;
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++)
|
||||
if (!This->accels[i]->isEmpty())
|
||||
changed |= This->accels[i]->intersectors.pointQuery(query,context);
|
||||
return changed;
|
||||
}
|
||||
|
||||
void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++)
|
||||
if (!This->accels[i]->isEmpty())
|
||||
This->accels[i]->intersectors.intersect(ray,context);
|
||||
}
|
||||
|
||||
void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++)
|
||||
if (!This->accels[i]->isEmpty())
|
||||
This->accels[i]->intersectors.intersect4(valid,ray,context);
|
||||
}
|
||||
|
||||
void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++)
|
||||
if (!This->accels[i]->isEmpty())
|
||||
This->accels[i]->intersectors.intersect8(valid,ray,context);
|
||||
}
|
||||
|
||||
void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++)
|
||||
if (!This->accels[i]->isEmpty())
|
||||
This->accels[i]->intersectors.intersect16(valid,ray,context);
|
||||
}
|
||||
|
||||
void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++) {
|
||||
if (This->accels[i]->isEmpty()) continue;
|
||||
This->accels[i]->intersectors.occluded(ray,context);
|
||||
if (ray.tfar < 0.0f) break;
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++) {
|
||||
if (This->accels[i]->isEmpty()) continue;
|
||||
This->accels[i]->intersectors.occluded4(valid,ray,context);
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
vbool4 valid0 = asBool(((vint4*)valid)[0]);
|
||||
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
|
||||
if (unlikely(none(valid0 & hit0))) break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++) {
|
||||
if (This->accels[i]->isEmpty()) continue;
|
||||
This->accels[i]->intersectors.occluded8(valid,ray,context);
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
|
||||
vbool4 valid0 = asBool(((vint4*)valid)[0]);
|
||||
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
|
||||
vbool4 valid1 = asBool(((vint4*)valid)[1]);
|
||||
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
|
||||
if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, RayQueryContext* context)
|
||||
{
|
||||
AccelN* This = (AccelN*)This_in->ptr;
|
||||
for (size_t i=0; i<This->accels.size(); i++) {
|
||||
if (This->accels[i]->isEmpty()) continue;
|
||||
This->accels[i]->intersectors.occluded16(valid,ray,context);
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
|
||||
vbool4 valid0 = asBool(((vint4*)valid)[0]);
|
||||
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
|
||||
vbool4 valid1 = asBool(((vint4*)valid)[1]);
|
||||
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
|
||||
vbool4 valid2 = asBool(((vint4*)valid)[2]);
|
||||
vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero);
|
||||
vbool4 valid3 = asBool(((vint4*)valid)[3]);
|
||||
vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero);
|
||||
if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::accels_print(size_t ident)
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
{
|
||||
for (size_t j=0; j<ident; j++) std::cout << " ";
|
||||
std::cout << "accels[" << i << "]" << std::endl;
|
||||
accels[i]->intersectors.print(ident+2);
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::accels_immutable()
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
accels[i]->immutable();
|
||||
}
|
||||
|
||||
void AccelN::accels_build ()
|
||||
{
|
||||
/* reduce memory consumption */
|
||||
accels.shrink_to_fit();
|
||||
|
||||
/* build all acceleration structures in parallel */
|
||||
parallel_for (accels.size(), [&] (size_t i) {
|
||||
accels[i]->build();
|
||||
});
|
||||
|
||||
/* create list of non-empty acceleration structures */
|
||||
bool valid1 = true;
|
||||
bool valid4 = true;
|
||||
bool valid8 = true;
|
||||
bool valid16 = true;
|
||||
for (size_t i=0; i<accels.size(); i++) {
|
||||
valid1 &= (bool) accels[i]->intersectors.intersector1;
|
||||
valid4 &= (bool) accels[i]->intersectors.intersector4;
|
||||
valid8 &= (bool) accels[i]->intersectors.intersector8;
|
||||
valid16 &= (bool) accels[i]->intersectors.intersector16;
|
||||
}
|
||||
|
||||
if (accels.size() == 1) {
|
||||
type = accels[0]->type; // FIXME: should just assign entire Accel
|
||||
bounds = accels[0]->bounds;
|
||||
intersectors = accels[0]->intersectors;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = AccelData::TY_ACCELN;
|
||||
intersectors.ptr = this;
|
||||
intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr);
|
||||
intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr);
|
||||
intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr);
|
||||
intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr);
|
||||
|
||||
/*! calculate bounds */
|
||||
bounds = empty;
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
bounds.extend(accels[i]->bounds);
|
||||
}
|
||||
}
|
||||
|
||||
void AccelN::accels_select(bool filter)
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
accels[i]->intersectors.select(filter);
|
||||
}
|
||||
|
||||
void AccelN::accels_deleteGeometry(size_t geomID)
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++)
|
||||
accels[i]->deleteGeometry(geomID);
|
||||
}
|
||||
|
||||
void AccelN::accels_clear()
|
||||
{
|
||||
for (size_t i=0; i<accels.size(); i++) {
|
||||
accels[i]->clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
47
engine/thirdparty/embree/kernels/common/acceln.h
vendored
Normal file
47
engine/thirdparty/embree/kernels/common/acceln.h
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "accel.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! merges N acceleration structures together, by processing them in order */
|
||||
class AccelN : public Accel
|
||||
{
|
||||
public:
|
||||
AccelN ();
|
||||
~AccelN();
|
||||
|
||||
public:
|
||||
void accels_add(Accel* accel);
|
||||
void accels_init();
|
||||
|
||||
public:
|
||||
static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
|
||||
|
||||
public:
|
||||
static void intersect (Accel::Intersectors* This, RTCRayHit& ray, RayQueryContext* context);
|
||||
static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, RayQueryContext* context);
|
||||
static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, RayQueryContext* context);
|
||||
static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, RayQueryContext* context);
|
||||
|
||||
public:
|
||||
static void occluded (Accel::Intersectors* This, RTCRay& ray, RayQueryContext* context);
|
||||
static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, RayQueryContext* context);
|
||||
static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, RayQueryContext* context);
|
||||
static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, RayQueryContext* context);
|
||||
|
||||
public:
|
||||
void accels_print(size_t ident);
|
||||
void accels_immutable();
|
||||
void accels_build ();
|
||||
void accels_select(bool filter);
|
||||
void accels_deleteGeometry(size_t geomID);
|
||||
void accels_clear ();
|
||||
|
||||
public:
|
||||
std::vector<Accel*> accels;
|
||||
};
|
||||
}
|
||||
17
engine/thirdparty/embree/kernels/common/accelset.cpp
vendored
Normal file
17
engine/thirdparty/embree/kernels/common/accelset.cpp
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "accelset.h"
|
||||
#include "scene.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps)
|
||||
: Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {}
|
||||
|
||||
AccelSet::IntersectorN::IntersectorN (ErrorFunc error)
|
||||
: intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
|
||||
|
||||
AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
|
||||
: intersect(intersect), occluded(occluded), name(name) {}
|
||||
}
|
||||
347
engine/thirdparty/embree/kernels/common/accelset.h
vendored
Normal file
347
engine/thirdparty/embree/kernels/common/accelset.h
vendored
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "builder.h"
|
||||
#include "geometry.h"
|
||||
#include "ray.h"
|
||||
#include "hit.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct IntersectFunctionNArguments;
|
||||
struct OccludedFunctionNArguments;
|
||||
|
||||
struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
|
||||
{
|
||||
Geometry* geometry;
|
||||
RTCScene forward_scene;
|
||||
RTCIntersectArguments* args;
|
||||
};
|
||||
|
||||
struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
|
||||
{
|
||||
Geometry* geometry;
|
||||
RTCScene forward_scene;
|
||||
RTCIntersectArguments* args;
|
||||
};
|
||||
|
||||
/*! Base class for set of acceleration structures. */
|
||||
class AccelSet : public Geometry
|
||||
{
|
||||
public:
|
||||
typedef RTCIntersectFunctionN IntersectFuncN;
|
||||
typedef RTCOccludedFunctionN OccludedFuncN;
|
||||
typedef void (*ErrorFunc) ();
|
||||
|
||||
struct IntersectorN
|
||||
{
|
||||
IntersectorN (ErrorFunc error = nullptr) ;
|
||||
IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name);
|
||||
|
||||
operator bool() const { return name; }
|
||||
|
||||
public:
|
||||
static const char* type;
|
||||
IntersectFuncN intersect;
|
||||
OccludedFuncN occluded;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! construction */
|
||||
AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps);
|
||||
|
||||
/*! makes the acceleration structure immutable */
|
||||
virtual void immutable () {}
|
||||
|
||||
/*! build accel */
|
||||
virtual void build () = 0;
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
if (!isvalid_non_empty(bounds(i,itime))) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! Calculates the bounds of an item */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime = 0) const
|
||||
{
|
||||
BBox3fa box;
|
||||
assert(i < size());
|
||||
RTCBoundsFunctionArguments args;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.primID = (unsigned int)i;
|
||||
args.timeStep = (unsigned int)itime;
|
||||
args.bounds_o = (RTCBounds*)&box;
|
||||
boundsFunc(&args);
|
||||
return box;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th item at the itime'th time segment */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const
|
||||
{
|
||||
BBox3fa box[2];
|
||||
assert(i < size());
|
||||
RTCBoundsFunctionArguments args;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.primID = (unsigned int)i;
|
||||
args.timeStep = (unsigned int)(itime+0);
|
||||
args.bounds_o = (RTCBounds*)&box[0];
|
||||
boundsFunc(&args);
|
||||
args.timeStep = (unsigned int)(itime+1);
|
||||
args.bounds_o = (RTCBounds*)&box[1];
|
||||
boundsFunc(&args);
|
||||
return LBBox3fa(box[0],box[1]);
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
|
||||
{
|
||||
const BBox3fa b = bounds(i);
|
||||
if (bbox) *bbox = b;
|
||||
return isvalid_non_empty(b);
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
const LBBox3fa bounds = linearBounds(i,itime);
|
||||
bbox = bounds.bounds0; // use bounding box of first timestep to build BVH
|
||||
return isvalid_non_empty(bounds);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const {
|
||||
if (!valid(i, timeSegmentRange(time_range))) return false;
|
||||
bbox = linearBounds(i, time_range);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* gets version info of topology */
|
||||
unsigned int getTopologyVersion() const {
|
||||
return numPrimitives;
|
||||
}
|
||||
|
||||
/* returns true if topology changed */
|
||||
bool topologyChanged(unsigned int otherVersion) const {
|
||||
return numPrimitives != otherVersion;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! Intersects a single ray with the scene. */
|
||||
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
int mask = -1;
|
||||
IntersectFunctionNArguments args;
|
||||
args.valid = &mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.rayhit = (RTCRayHitN*)&ray;
|
||||
args.N = 1;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = context->args;
|
||||
|
||||
IntersectFuncN intersectFunc = nullptr;
|
||||
intersectFunc = intersectorN.intersect;
|
||||
|
||||
if (context->getIntersectFunction())
|
||||
intersectFunc = context->getIntersectFunction();
|
||||
|
||||
assert(intersectFunc);
|
||||
intersectFunc(&args);
|
||||
|
||||
return mask != 0;
|
||||
}
|
||||
|
||||
/*! Tests if single ray is occluded by the scene. */
|
||||
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
int mask = -1;
|
||||
OccludedFunctionNArguments args;
|
||||
args.valid = &mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.ray = (RTCRayN*)&ray;
|
||||
args.N = 1;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = context->args;
|
||||
|
||||
OccludedFuncN occludedFunc = nullptr;
|
||||
occludedFunc = intersectorN.occluded;
|
||||
|
||||
if (context->getOccludedFunction())
|
||||
occludedFunc = context->getOccludedFunction();
|
||||
|
||||
assert(occludedFunc);
|
||||
occludedFunc(&args);
|
||||
|
||||
return mask != 0;
|
||||
}
|
||||
|
||||
/*! Intersects a single ray with the scene. */
|
||||
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
int mask = -1;
|
||||
IntersectFunctionNArguments args;
|
||||
args.valid = &mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.rayhit = (RTCRayHitN*)&ray;
|
||||
args.N = 1;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = nullptr;
|
||||
|
||||
typedef void (*RTCIntersectFunctionSYCL)(const void* args);
|
||||
RTCIntersectFunctionSYCL intersectFunc = nullptr;
|
||||
|
||||
#if EMBREE_SYCL_GEOMETRY_CALLBACK
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
|
||||
intersectFunc = (RTCIntersectFunctionSYCL) intersectorN.intersect;
|
||||
#endif
|
||||
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
|
||||
if (context->getIntersectFunction())
|
||||
intersectFunc = (RTCIntersectFunctionSYCL) context->getIntersectFunction();
|
||||
|
||||
if (intersectFunc)
|
||||
intersectFunc(&args);
|
||||
|
||||
forward_scene = args.forward_scene;
|
||||
return mask != 0;
|
||||
}
|
||||
|
||||
/*! Tests if single ray is occluded by the scene. */
|
||||
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
int mask = -1;
|
||||
OccludedFunctionNArguments args;
|
||||
args.valid = &mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.ray = (RTCRayN*)&ray;
|
||||
args.N = 1;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = nullptr;
|
||||
|
||||
typedef void (*RTCOccludedFunctionSYCL)(const void* args);
|
||||
RTCOccludedFunctionSYCL occludedFunc = nullptr;
|
||||
|
||||
#if EMBREE_SYCL_GEOMETRY_CALLBACK
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
|
||||
occludedFunc = (RTCOccludedFunctionSYCL) intersectorN.occluded;
|
||||
#endif
|
||||
|
||||
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
|
||||
if (context->getOccludedFunction())
|
||||
occludedFunc = (RTCOccludedFunctionSYCL) context->getOccludedFunction();
|
||||
|
||||
if (occludedFunc)
|
||||
occludedFunc(&args);
|
||||
|
||||
forward_scene = args.forward_scene;
|
||||
return mask != 0;
|
||||
}
|
||||
|
||||
/*! Intersects a packet of K rays with the scene. */
|
||||
template<int K>
|
||||
__forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
vint<K> mask = valid.mask32();
|
||||
IntersectFunctionNArguments args;
|
||||
args.valid = (int*)&mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.rayhit = (RTCRayHitN*)&ray;
|
||||
args.N = K;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = context->args;
|
||||
|
||||
IntersectFuncN intersectFunc = nullptr;
|
||||
intersectFunc = intersectorN.intersect;
|
||||
|
||||
if (context->getIntersectFunction())
|
||||
intersectFunc = context->getIntersectFunction();
|
||||
|
||||
assert(intersectFunc);
|
||||
intersectFunc(&args);
|
||||
}
|
||||
|
||||
/*! Tests if a packet of K rays is occluded by the scene. */
|
||||
template<int K>
|
||||
__forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
|
||||
{
|
||||
assert(primID < size());
|
||||
|
||||
vint<K> mask = valid.mask32();
|
||||
OccludedFunctionNArguments args;
|
||||
args.valid = (int*)&mask;
|
||||
args.geometryUserPtr = userPtr;
|
||||
args.context = context->user;
|
||||
args.ray = (RTCRayN*)&ray;
|
||||
args.N = K;
|
||||
args.geomID = geomID;
|
||||
args.primID = primID;
|
||||
args.geometry = this;
|
||||
args.forward_scene = nullptr;
|
||||
args.args = context->args;
|
||||
|
||||
OccludedFuncN occludedFunc = nullptr;
|
||||
occludedFunc = intersectorN.occluded;
|
||||
|
||||
if (context->getOccludedFunction())
|
||||
occludedFunc = context->getOccludedFunction();
|
||||
|
||||
assert(occludedFunc);
|
||||
occludedFunc(&args);
|
||||
}
|
||||
|
||||
public:
|
||||
RTCBoundsFunction boundsFunc;
|
||||
IntersectorN intersectorN;
|
||||
};
|
||||
|
||||
#define DEFINE_SET_INTERSECTORN(symbol,intersector) \
|
||||
AccelSet::IntersectorN symbol() { \
|
||||
return AccelSet::IntersectorN(intersector::intersect, \
|
||||
intersector::occluded, \
|
||||
TOSTRING(isa) "::" TOSTRING(symbol)); \
|
||||
}
|
||||
}
|
||||
82
engine/thirdparty/embree/kernels/common/alloc.cpp
vendored
Normal file
82
engine/thirdparty/embree/kernels/common/alloc.cpp
vendored
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "alloc.h"
|
||||
#include "../../common/sys/thread.h"
|
||||
#if defined(APPLE) && defined(__aarch64__)
|
||||
#include "../../common/sys/barrier.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
__thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr;
|
||||
MutexSys FastAllocator::s_thread_local_allocators_lock;
|
||||
std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators;
|
||||
|
||||
struct fast_allocator_regression_test : public RegressionTest
|
||||
{
|
||||
BarrierSys barrier;
|
||||
std::atomic<size_t> numFailed;
|
||||
std::unique_ptr<FastAllocator> alloc;
|
||||
|
||||
fast_allocator_regression_test()
|
||||
: RegressionTest("fast_allocator_regression_test"), numFailed(0)
|
||||
{
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
static void thread_alloc(fast_allocator_regression_test* This)
|
||||
{
|
||||
FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator();
|
||||
|
||||
size_t* ptrs[1000];
|
||||
for (size_t j=0; j<1000; j++)
|
||||
{
|
||||
This->barrier.wait();
|
||||
for (size_t i=0; i<1000; i++) {
|
||||
ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32));
|
||||
*ptrs[i] = size_t(threadalloc.talloc0) + i;
|
||||
}
|
||||
for (size_t i=0; i<1000; i++) {
|
||||
if (*ptrs[i] != size_t(threadalloc.talloc0) + i)
|
||||
This->numFailed++;
|
||||
}
|
||||
This->barrier.wait();
|
||||
}
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
alloc = make_unique(new FastAllocator(nullptr,false));
|
||||
numFailed.store(0);
|
||||
|
||||
size_t numThreads = getNumberOfLogicalThreads();
|
||||
barrier.init(numThreads+1);
|
||||
|
||||
/* create threads */
|
||||
std::vector<thread_t> threads;
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
threads.push_back(createThread((thread_func)thread_alloc,this));
|
||||
|
||||
/* run test */
|
||||
for (size_t i=0; i<1000; i++)
|
||||
{
|
||||
alloc->reset();
|
||||
barrier.wait();
|
||||
barrier.wait();
|
||||
}
|
||||
|
||||
/* destroy threads */
|
||||
for (size_t i=0; i<numThreads; i++)
|
||||
join(threads[i]);
|
||||
|
||||
alloc = nullptr;
|
||||
|
||||
return numFailed == 0;
|
||||
}
|
||||
};
|
||||
|
||||
fast_allocator_regression_test fast_allocator_regression;
|
||||
}
|
||||
|
||||
|
||||
1015
engine/thirdparty/embree/kernels/common/alloc.h
vendored
Normal file
1015
engine/thirdparty/embree/kernels/common/alloc.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
280
engine/thirdparty/embree/kernels/common/buffer.h
vendored
Normal file
280
engine/thirdparty/embree/kernels/common/buffer.h
vendored
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "device.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Implements an API data buffer object. This class may or may not own the data. */
|
||||
class Buffer : public RefCount
|
||||
{
|
||||
public:
|
||||
/*! Buffer construction */
|
||||
//Buffer()
|
||||
//: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
|
||||
|
||||
/*! Buffer construction */
|
||||
Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
|
||||
: device(device), numBytes(numBytes_in)
|
||||
{
|
||||
device->refInc();
|
||||
|
||||
if (ptr_in)
|
||||
{
|
||||
shared = true;
|
||||
ptr = (char*)ptr_in;
|
||||
}
|
||||
else
|
||||
{
|
||||
shared = false;
|
||||
alloc();
|
||||
}
|
||||
}
|
||||
|
||||
/*! Buffer destruction */
|
||||
~Buffer() {
|
||||
free();
|
||||
device->refDec();
|
||||
}
|
||||
|
||||
/*! this class is not copyable */
|
||||
private:
|
||||
Buffer(const Buffer& other) DELETED; // do not implement
|
||||
Buffer& operator =(const Buffer& other) DELETED; // do not implement
|
||||
|
||||
public:
|
||||
/* inits and allocates the buffer */
|
||||
void create(Device* device_in, size_t numBytes_in)
|
||||
{
|
||||
init(device_in, numBytes_in);
|
||||
alloc();
|
||||
}
|
||||
|
||||
/* inits the buffer */
|
||||
void init(Device* device_in, size_t numBytes_in)
|
||||
{
|
||||
free();
|
||||
device = device_in;
|
||||
ptr = nullptr;
|
||||
numBytes = numBytes_in;
|
||||
shared = false;
|
||||
}
|
||||
|
||||
/*! sets shared buffer */
|
||||
void set(Device* device_in, void* ptr_in, size_t numBytes_in)
|
||||
{
|
||||
free();
|
||||
device = device_in;
|
||||
ptr = (char*)ptr_in;
|
||||
if (numBytes_in != (size_t)-1)
|
||||
numBytes = numBytes_in;
|
||||
shared = true;
|
||||
}
|
||||
|
||||
/*! allocated buffer */
|
||||
void alloc()
|
||||
{
|
||||
device->memoryMonitor(this->bytes(), false);
|
||||
size_t b = (this->bytes()+15) & ssize_t(-16);
|
||||
ptr = (char*)device->malloc(b,16);
|
||||
}
|
||||
|
||||
/*! frees the buffer */
|
||||
void free()
|
||||
{
|
||||
if (shared) return;
|
||||
device->free(ptr);
|
||||
device->memoryMonitor(-ssize_t(this->bytes()), true);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
/*! gets buffer pointer */
|
||||
void* data()
|
||||
{
|
||||
/* report error if buffer is not existing */
|
||||
if (!device)
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
|
||||
|
||||
/* return buffer */
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*! returns pointer to first element */
|
||||
__forceinline char* getPtr() const {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*! returns the number of bytes of the buffer */
|
||||
__forceinline size_t bytes() const {
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
/*! returns true of the buffer is not empty */
|
||||
__forceinline operator bool() const {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
public:
|
||||
Device* device; //!< device to report memory usage to
|
||||
char* ptr; //!< pointer to buffer data
|
||||
size_t numBytes; //!< number of bytes in the buffer
|
||||
bool shared; //!< set if memory is shared with application
|
||||
};
|
||||
|
||||
/*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
|
||||
class RawBufferView
|
||||
{
|
||||
public:
|
||||
/*! Buffer construction */
|
||||
RawBufferView()
|
||||
: ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
|
||||
|
||||
public:
|
||||
/*! sets the buffer view */
|
||||
void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
|
||||
{
|
||||
if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
|
||||
|
||||
ptr_ofs = buffer_in->ptr + offset_in;
|
||||
stride = stride_in;
|
||||
num = num_in;
|
||||
format = format_in;
|
||||
modCounter++;
|
||||
modified = true;
|
||||
buffer = buffer_in;
|
||||
}
|
||||
|
||||
/*! returns pointer to the first element */
|
||||
__forceinline char* getPtr() const {
|
||||
return ptr_ofs;
|
||||
}
|
||||
|
||||
/*! returns pointer to the i'th element */
|
||||
__forceinline char* getPtr(size_t i) const
|
||||
{
|
||||
assert(i<num);
|
||||
return ptr_ofs + i*stride;
|
||||
}
|
||||
|
||||
/*! returns the number of elements of the buffer */
|
||||
__forceinline size_t size() const {
|
||||
return num;
|
||||
}
|
||||
|
||||
/*! returns the number of bytes of the buffer */
|
||||
__forceinline size_t bytes() const {
|
||||
return num*stride;
|
||||
}
|
||||
|
||||
/*! returns the buffer stride */
|
||||
__forceinline unsigned getStride() const
|
||||
{
|
||||
assert(stride <= unsigned(inf));
|
||||
return unsigned(stride);
|
||||
}
|
||||
|
||||
/*! return the buffer format */
|
||||
__forceinline RTCFormat getFormat() const {
|
||||
return format;
|
||||
}
|
||||
|
||||
/*! mark buffer as modified or unmodified */
|
||||
__forceinline void setModified() {
|
||||
modCounter++;
|
||||
modified = true;
|
||||
}
|
||||
|
||||
/*! mark buffer as modified or unmodified */
|
||||
__forceinline bool isModified(unsigned int otherModCounter) const {
|
||||
return modCounter > otherModCounter;
|
||||
}
|
||||
|
||||
/*! mark buffer as modified or unmodified */
|
||||
__forceinline bool isLocalModified() const {
|
||||
return modified;
|
||||
}
|
||||
|
||||
/*! clear local modified flag */
|
||||
__forceinline void clearLocalModified() {
|
||||
modified = false;
|
||||
}
|
||||
|
||||
/*! returns true of the buffer is not empty */
|
||||
__forceinline operator bool() const {
|
||||
return ptr_ofs;
|
||||
}
|
||||
|
||||
/*! checks padding to 16 byte check, fails hard */
|
||||
__forceinline void checkPadding16() const
|
||||
{
|
||||
if (ptr_ofs && num)
|
||||
volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?
|
||||
}
|
||||
|
||||
public:
|
||||
char* ptr_ofs; //!< base pointer plus offset
|
||||
size_t stride; //!< stride of the buffer in bytes
|
||||
size_t num; //!< number of elements in the buffer
|
||||
RTCFormat format; //!< format of the buffer
|
||||
unsigned int modCounter; //!< version ID of this buffer
|
||||
bool modified; //!< local modified data
|
||||
int userData; //!< special data
|
||||
Ref<Buffer> buffer; //!< reference to the parent buffer
|
||||
};
|
||||
|
||||
/*! A typed contiguous range of a buffer. This class does not own the buffer content. */
|
||||
template<typename T>
|
||||
class BufferView : public RawBufferView
|
||||
{
|
||||
public:
|
||||
typedef T value_type;
|
||||
|
||||
/*! access to the ith element of the buffer */
|
||||
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
|
||||
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
|
||||
};
|
||||
|
||||
template<>
|
||||
class BufferView<Vec3fa> : public RawBufferView
|
||||
{
|
||||
public:
|
||||
typedef Vec3fa value_type;
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
/*! access to the ith element of the buffer */
|
||||
__forceinline const Vec3fa operator [](size_t i) const
|
||||
{
|
||||
assert(i<num);
|
||||
return Vec3fa::loadu(ptr_ofs + i*stride);
|
||||
}
|
||||
|
||||
/*! writes the i'th element */
|
||||
__forceinline void store(size_t i, const Vec3fa& v)
|
||||
{
|
||||
assert(i<num);
|
||||
Vec3fa::storeu(ptr_ofs + i*stride, v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*! access to the ith element of the buffer */
|
||||
__forceinline const Vec3fa operator [](size_t i) const
|
||||
{
|
||||
assert(i<num);
|
||||
return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));
|
||||
}
|
||||
|
||||
/*! writes the i'th element */
|
||||
__forceinline void store(size_t i, const Vec3fa& v)
|
||||
{
|
||||
assert(i<num);
|
||||
vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
}
|
||||
60
engine/thirdparty/embree/kernels/common/builder.h
vendored
Normal file
60
engine/thirdparty/embree/kernels/common/builder.h
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "accel.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#define MODE_HIGH_QUALITY (1<<8)
|
||||
|
||||
/*! virtual interface for all hierarchy builders */
|
||||
class Builder : public RefCount {
|
||||
public:
|
||||
|
||||
static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
|
||||
|
||||
/*! initiates the hierarchy builder */
|
||||
virtual void build() = 0;
|
||||
|
||||
/*! notifies the builder about the deletion of some geometry */
|
||||
virtual void deleteGeometry(size_t geomID) {};
|
||||
|
||||
/*! clears internal builder state */
|
||||
virtual void clear() = 0;
|
||||
};
|
||||
|
||||
/*! virtual interface for progress monitor class */
|
||||
struct BuildProgressMonitor {
|
||||
virtual void operator() (size_t dn) const = 0;
|
||||
};
|
||||
|
||||
/*! build the progress monitor interface from a closure */
|
||||
template<typename Closure>
|
||||
struct ProgressMonitorClosure : BuildProgressMonitor
|
||||
{
|
||||
public:
|
||||
ProgressMonitorClosure (const Closure& closure) : closure(closure) {}
|
||||
void operator() (size_t dn) const { closure(dn); }
|
||||
private:
|
||||
const Closure closure;
|
||||
};
|
||||
template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) {
|
||||
return ProgressMonitorClosure<Closure>(closure);
|
||||
}
|
||||
|
||||
struct LineSegments;
|
||||
struct TriangleMesh;
|
||||
struct QuadMesh;
|
||||
struct UserGeometry;
|
||||
|
||||
class Scene;
|
||||
|
||||
typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder);
|
||||
typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
|
||||
typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
|
||||
typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
|
||||
|
||||
}
|
||||
173
engine/thirdparty/embree/kernels/common/context.h
vendored
Normal file
173
engine/thirdparty/embree/kernels/common/context.h
vendored
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "rtcore.h"
|
||||
#include "point_query.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class Scene;
|
||||
|
||||
struct RayQueryContext
|
||||
{
|
||||
public:
|
||||
|
||||
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCIntersectArguments* args)
|
||||
: scene(scene), user(user_context), args(args) {}
|
||||
|
||||
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCOccludedArguments* args)
|
||||
: scene(scene), user(user_context), args((RTCIntersectArguments*)args) {}
|
||||
|
||||
__forceinline bool hasContextFilter() const {
|
||||
return args->filter != nullptr;
|
||||
}
|
||||
|
||||
RTCFilterFunctionN getFilter() const {
|
||||
return args->filter;
|
||||
}
|
||||
|
||||
RTCIntersectFunctionN getIntersectFunction() const {
|
||||
return args->intersect;
|
||||
}
|
||||
|
||||
RTCOccludedFunctionN getOccludedFunction() const {
|
||||
return (RTCOccludedFunctionN) args->intersect;
|
||||
}
|
||||
|
||||
__forceinline bool isCoherent() const {
|
||||
return embree::isCoherent(args->flags);
|
||||
}
|
||||
|
||||
__forceinline bool isIncoherent() const {
|
||||
return embree::isIncoherent(args->flags);
|
||||
}
|
||||
|
||||
__forceinline bool enforceArgumentFilterFunction() const {
|
||||
return args->flags & RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER;
|
||||
}
|
||||
|
||||
#if RTC_MIN_WIDTH
|
||||
__forceinline float getMinWidthDistanceFactor() const {
|
||||
return args->minWidthDistanceFactor;
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
Scene* scene = nullptr;
|
||||
RTCRayQueryContext* user = nullptr;
|
||||
RTCIntersectArguments* args = nullptr;
|
||||
};
|
||||
|
||||
template<int M, typename Geometry>
|
||||
__forceinline Vec4vf<M> enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v)
|
||||
{
|
||||
#if RTC_MIN_WIDTH
|
||||
const vfloat<M> d = length(Vec3vf<M>(v) - ray_org);
|
||||
const vfloat<M> r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
|
||||
return Vec4vf<M>(v.x,v.y,v.z,r);
|
||||
#else
|
||||
return v;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename Geometry>
|
||||
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v)
|
||||
{
|
||||
#if RTC_MIN_WIDTH
|
||||
const float d = length(Vec3fa(v) - ray_org);
|
||||
const float r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
|
||||
return Vec3ff(v.x,v.y,v.z,r);
|
||||
#else
|
||||
return v;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename Geometry>
|
||||
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec4f& v) {
|
||||
return enlargeRadiusToMinWidth(context,geom,ray_org,Vec3ff(v.x,v.y,v.z,v.w));
|
||||
}
|
||||
|
||||
enum PointQueryType
|
||||
{
|
||||
POINT_QUERY_TYPE_UNDEFINED = 0,
|
||||
POINT_QUERY_TYPE_SPHERE = 1,
|
||||
POINT_QUERY_TYPE_AABB = 2,
|
||||
};
|
||||
|
||||
typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
|
||||
|
||||
struct PointQueryContext
|
||||
{
|
||||
public:
|
||||
__forceinline PointQueryContext(Scene* scene,
|
||||
PointQuery* query_ws,
|
||||
PointQueryType query_type,
|
||||
PointQueryFunction func,
|
||||
RTCPointQueryContext* userContext,
|
||||
float similarityScale,
|
||||
void* userPtr)
|
||||
: scene(scene)
|
||||
, tstate(nullptr)
|
||||
, query_ws(query_ws)
|
||||
, query_type(query_type)
|
||||
, func(func)
|
||||
, userContext(userContext)
|
||||
, similarityScale(similarityScale)
|
||||
, userPtr(userPtr)
|
||||
, primID(RTC_INVALID_GEOMETRY_ID)
|
||||
, geomID(RTC_INVALID_GEOMETRY_ID)
|
||||
, query_radius(query_ws->radius)
|
||||
{
|
||||
update();
|
||||
}
|
||||
|
||||
public:
|
||||
__forceinline void update()
|
||||
{
|
||||
if (query_type == POINT_QUERY_TYPE_AABB) {
|
||||
assert(similarityScale == 0.f);
|
||||
updateAABB();
|
||||
}
|
||||
else{
|
||||
query_radius = Vec3fa(query_ws->radius * similarityScale);
|
||||
}
|
||||
if (userContext->instStackSize == 0) {
|
||||
assert(similarityScale == 1.f);
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void updateAABB()
|
||||
{
|
||||
if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) {
|
||||
query_radius = Vec3fa(query_ws->radius);
|
||||
return;
|
||||
}
|
||||
|
||||
const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
|
||||
BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius));
|
||||
bbox = xfmBounds(m, bbox);
|
||||
query_radius = 0.5f * (bbox.upper - bbox.lower);
|
||||
}
|
||||
|
||||
public:
|
||||
Scene* scene;
|
||||
void* tstate;
|
||||
|
||||
PointQuery* query_ws; // the original world space point query
|
||||
PointQueryType query_type;
|
||||
PointQueryFunction func;
|
||||
RTCPointQueryContext* userContext;
|
||||
float similarityScale;
|
||||
|
||||
void* userPtr;
|
||||
|
||||
unsigned int primID;
|
||||
unsigned int geomID;
|
||||
|
||||
Vec3fa query_radius; // used if the query is converted to an AABB internally
|
||||
};
|
||||
}
|
||||
|
||||
266
engine/thirdparty/embree/kernels/common/default.h
vendored
Normal file
266
engine/thirdparty/embree/kernels/common/default.h
vendored
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/sys/platform.h"
|
||||
#include "../../common/sys/sysinfo.h"
|
||||
#include "../../common/sys/thread.h"
|
||||
#include "../../common/sys/alloc.h"
|
||||
#include "../../common/sys/ref.h"
|
||||
#include "../../common/sys/intrinsics.h"
|
||||
#include "../../common/sys/atomic.h"
|
||||
#include "../../common/sys/mutex.h"
|
||||
#include "../../common/sys/vector.h"
|
||||
#include "../../common/sys/array.h"
|
||||
#include "../../common/sys/estring.h"
|
||||
#include "../../common/sys/regression.h"
|
||||
#include "../../common/sys/vector.h"
|
||||
|
||||
#include "../../common/math/emath.h"
|
||||
#include "../../common/math/transcendental.h"
|
||||
#include "../../common/simd/simd.h"
|
||||
#include "../../common/math/vec2.h"
|
||||
#include "../../common/math/vec3.h"
|
||||
#include "../../common/math/vec4.h"
|
||||
#include "../../common/math/vec2fa.h"
|
||||
#include "../../common/math/vec3fa.h"
|
||||
#include "../../common/math/interval.h"
|
||||
#include "../../common/math/bbox.h"
|
||||
#include "../../common/math/obbox.h"
|
||||
#include "../../common/math/lbbox.h"
|
||||
#include "../../common/math/linearspace2.h"
|
||||
#include "../../common/math/linearspace3.h"
|
||||
#include "../../common/math/affinespace.h"
|
||||
#include "../../common/math/range.h"
|
||||
#include "../../common/lexers/tokenstream.h"
|
||||
|
||||
#define COMMA ,
|
||||
|
||||
#include "../config.h"
|
||||
#include "isa.h"
|
||||
#include "stat.h"
|
||||
#include "profile.h"
|
||||
#include "rtcore.h"
|
||||
#include "vector.h"
|
||||
#include "state.h"
|
||||
#include "instance_stack.h"
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <sstream>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Vec2 shortcuts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N> using Vec2vf = Vec2<vfloat<N>>;
|
||||
template<int N> using Vec2vd = Vec2<vdouble<N>>;
|
||||
template<int N> using Vec2vr = Vec2<vreal<N>>;
|
||||
template<int N> using Vec2vi = Vec2<vint<N>>;
|
||||
template<int N> using Vec2vl = Vec2<vllong<N>>;
|
||||
template<int N> using Vec2vb = Vec2<vbool<N>>;
|
||||
template<int N> using Vec2vbf = Vec2<vboolf<N>>;
|
||||
template<int N> using Vec2vbd = Vec2<vboold<N>>;
|
||||
|
||||
typedef Vec2<vfloat4> Vec2vf4;
|
||||
typedef Vec2<vdouble4> Vec2vd4;
|
||||
typedef Vec2<vreal4> Vec2vr4;
|
||||
typedef Vec2<vint4> Vec2vi4;
|
||||
typedef Vec2<vllong4> Vec2vl4;
|
||||
typedef Vec2<vbool4> Vec2vb4;
|
||||
typedef Vec2<vboolf4> Vec2vbf4;
|
||||
typedef Vec2<vboold4> Vec2vbd4;
|
||||
|
||||
typedef Vec2<vfloat8> Vec2vf8;
|
||||
typedef Vec2<vdouble8> Vec2vd8;
|
||||
typedef Vec2<vreal8> Vec2vr8;
|
||||
typedef Vec2<vint8> Vec2vi8;
|
||||
typedef Vec2<vllong8> Vec2vl8;
|
||||
typedef Vec2<vbool8> Vec2vb8;
|
||||
typedef Vec2<vboolf8> Vec2vbf8;
|
||||
typedef Vec2<vboold8> Vec2vbd8;
|
||||
|
||||
typedef Vec2<vfloat16> Vec2vf16;
|
||||
typedef Vec2<vdouble16> Vec2vd16;
|
||||
typedef Vec2<vreal16> Vec2vr16;
|
||||
typedef Vec2<vint16> Vec2vi16;
|
||||
typedef Vec2<vllong16> Vec2vl16;
|
||||
typedef Vec2<vbool16> Vec2vb16;
|
||||
typedef Vec2<vboolf16> Vec2vbf16;
|
||||
typedef Vec2<vboold16> Vec2vbd16;
|
||||
|
||||
typedef Vec2<vfloatx> Vec2vfx;
|
||||
typedef Vec2<vdoublex> Vec2vdx;
|
||||
typedef Vec2<vrealx> Vec2vrx;
|
||||
typedef Vec2<vintx> Vec2vix;
|
||||
typedef Vec2<vllongx> Vec2vlx;
|
||||
typedef Vec2<vboolx> Vec2vbx;
|
||||
typedef Vec2<vboolfx> Vec2vbfx;
|
||||
typedef Vec2<vbooldx> Vec2vbdx;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Vec3 shortcuts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N> using Vec3vf = Vec3<vfloat<N>>;
|
||||
template<int N> using Vec3vd = Vec3<vdouble<N>>;
|
||||
template<int N> using Vec3vr = Vec3<vreal<N>>;
|
||||
template<int N> using Vec3vi = Vec3<vint<N>>;
|
||||
template<int N> using Vec3vl = Vec3<vllong<N>>;
|
||||
template<int N> using Vec3vb = Vec3<vbool<N>>;
|
||||
template<int N> using Vec3vbf = Vec3<vboolf<N>>;
|
||||
template<int N> using Vec3vbd = Vec3<vboold<N>>;
|
||||
|
||||
typedef Vec3<vfloat4> Vec3vf4;
|
||||
typedef Vec3<vdouble4> Vec3vd4;
|
||||
typedef Vec3<vreal4> Vec3vr4;
|
||||
typedef Vec3<vint4> Vec3vi4;
|
||||
typedef Vec3<vllong4> Vec3vl4;
|
||||
typedef Vec3<vbool4> Vec3vb4;
|
||||
typedef Vec3<vboolf4> Vec3vbf4;
|
||||
typedef Vec3<vboold4> Vec3vbd4;
|
||||
|
||||
typedef Vec3<vfloat8> Vec3vf8;
|
||||
typedef Vec3<vdouble8> Vec3vd8;
|
||||
typedef Vec3<vreal8> Vec3vr8;
|
||||
typedef Vec3<vint8> Vec3vi8;
|
||||
typedef Vec3<vllong8> Vec3vl8;
|
||||
typedef Vec3<vbool8> Vec3vb8;
|
||||
typedef Vec3<vboolf8> Vec3vbf8;
|
||||
typedef Vec3<vboold8> Vec3vbd8;
|
||||
|
||||
typedef Vec3<vfloat16> Vec3vf16;
|
||||
typedef Vec3<vdouble16> Vec3vd16;
|
||||
typedef Vec3<vreal16> Vec3vr16;
|
||||
typedef Vec3<vint16> Vec3vi16;
|
||||
typedef Vec3<vllong16> Vec3vl16;
|
||||
typedef Vec3<vbool16> Vec3vb16;
|
||||
typedef Vec3<vboolf16> Vec3vbf16;
|
||||
typedef Vec3<vboold16> Vec3vbd16;
|
||||
|
||||
typedef Vec3<vfloatx> Vec3vfx;
|
||||
typedef Vec3<vdoublex> Vec3vdx;
|
||||
typedef Vec3<vrealx> Vec3vrx;
|
||||
typedef Vec3<vintx> Vec3vix;
|
||||
typedef Vec3<vllongx> Vec3vlx;
|
||||
typedef Vec3<vboolx> Vec3vbx;
|
||||
typedef Vec3<vboolfx> Vec3vbfx;
|
||||
typedef Vec3<vbooldx> Vec3vbdx;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Vec4 shortcuts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N> using Vec4vf = Vec4<vfloat<N>>;
|
||||
template<int N> using Vec4vd = Vec4<vdouble<N>>;
|
||||
template<int N> using Vec4vr = Vec4<vreal<N>>;
|
||||
template<int N> using Vec4vi = Vec4<vint<N>>;
|
||||
template<int N> using Vec4vl = Vec4<vllong<N>>;
|
||||
template<int N> using Vec4vb = Vec4<vbool<N>>;
|
||||
template<int N> using Vec4vbf = Vec4<vboolf<N>>;
|
||||
template<int N> using Vec4vbd = Vec4<vboold<N>>;
|
||||
|
||||
typedef Vec4<vfloat4> Vec4vf4;
|
||||
typedef Vec4<vdouble4> Vec4vd4;
|
||||
typedef Vec4<vreal4> Vec4vr4;
|
||||
typedef Vec4<vint4> Vec4vi4;
|
||||
typedef Vec4<vllong4> Vec4vl4;
|
||||
typedef Vec4<vbool4> Vec4vb4;
|
||||
typedef Vec4<vboolf4> Vec4vbf4;
|
||||
typedef Vec4<vboold4> Vec4vbd4;
|
||||
|
||||
typedef Vec4<vfloat8> Vec4vf8;
|
||||
typedef Vec4<vdouble8> Vec4vd8;
|
||||
typedef Vec4<vreal8> Vec4vr8;
|
||||
typedef Vec4<vint8> Vec4vi8;
|
||||
typedef Vec4<vllong8> Vec4vl8;
|
||||
typedef Vec4<vbool8> Vec4vb8;
|
||||
typedef Vec4<vboolf8> Vec4vbf8;
|
||||
typedef Vec4<vboold8> Vec4vbd8;
|
||||
|
||||
typedef Vec4<vfloat16> Vec4vf16;
|
||||
typedef Vec4<vdouble16> Vec4vd16;
|
||||
typedef Vec4<vreal16> Vec4vr16;
|
||||
typedef Vec4<vint16> Vec4vi16;
|
||||
typedef Vec4<vllong16> Vec4vl16;
|
||||
typedef Vec4<vbool16> Vec4vb16;
|
||||
typedef Vec4<vboolf16> Vec4vbf16;
|
||||
typedef Vec4<vboold16> Vec4vbd16;
|
||||
|
||||
typedef Vec4<vfloatx> Vec4vfx;
|
||||
typedef Vec4<vdoublex> Vec4vdx;
|
||||
typedef Vec4<vrealx> Vec4vrx;
|
||||
typedef Vec4<vintx> Vec4vix;
|
||||
typedef Vec4<vllongx> Vec4vlx;
|
||||
typedef Vec4<vboolx> Vec4vbx;
|
||||
typedef Vec4<vboolfx> Vec4vbfx;
|
||||
typedef Vec4<vbooldx> Vec4vbdx;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Other shortcuts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<int N> using BBox3vf = BBox<Vec3vf<N>>;
|
||||
typedef BBox<Vec3vf4> BBox3vf4;
|
||||
typedef BBox<Vec3vf8> BBox3vf8;
|
||||
typedef BBox<Vec3vf16> BBox3vf16;
|
||||
|
||||
/* calculate time segment itime and fractional time ftime */
|
||||
__forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
|
||||
{
|
||||
const float timeScaled = time * numTimeSegments;
|
||||
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
|
||||
ftime = timeScaled - itimef;
|
||||
return int(itimef);
|
||||
}
|
||||
|
||||
__forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
|
||||
{
|
||||
const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
|
||||
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
|
||||
ftime = timeScaled - itimef;
|
||||
return int(itimef);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
|
||||
{
|
||||
const vfloat<N> timeScaled = time * numTimeSegments;
|
||||
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
|
||||
ftime = timeScaled - itimef;
|
||||
return vint<N>(itimef);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
|
||||
{
|
||||
const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
|
||||
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
|
||||
ftime = timeScaled - itimef;
|
||||
return vint<N>(itimef);
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
|
||||
{
|
||||
const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
|
||||
const float round_down = 1.0f-2.0f*float(ulp);
|
||||
const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
|
||||
const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
|
||||
return make_range(itime_lower, itime_upper);
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
|
||||
{
|
||||
const float lower = (range.lower-time_range.lower)/time_range.size();
|
||||
const float upper = (range.upper-time_range.lower)/time_range.size();
|
||||
return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
|
||||
}
|
||||
}
|
||||
730
engine/thirdparty/embree/kernels/common/device.cpp
vendored
Normal file
730
engine/thirdparty/embree/kernels/common/device.cpp
vendored
Normal file
|
|
@ -0,0 +1,730 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "device.h"
|
||||
|
||||
#include "../../common/tasking/taskscheduler.h"
|
||||
|
||||
#include "../hash.h"
|
||||
#include "scene_triangle_mesh.h"
|
||||
#include "scene_user_geometry.h"
|
||||
#include "scene_instance.h"
|
||||
#include "scene_curves.h"
|
||||
#include "scene_subdiv_mesh.h"
|
||||
|
||||
#include "../subdiv/tessellation_cache.h"
|
||||
|
||||
#include "acceln.h"
|
||||
#include "geometry.h"
|
||||
|
||||
#include "../geometry/cylinder.h"
|
||||
|
||||
#include "../bvh/bvh4_factory.h"
|
||||
#include "../bvh/bvh8_factory.h"
|
||||
|
||||
#include "../../common/sys/alloc.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
# include "../level_zero/ze_wrapper.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
|
||||
ssize_t Device::debug_int0 = 0;
|
||||
ssize_t Device::debug_int1 = 0;
|
||||
ssize_t Device::debug_int2 = 0;
|
||||
ssize_t Device::debug_int3 = 0;
|
||||
|
||||
static MutexSys g_mutex;
|
||||
static std::map<Device*,size_t> g_cache_size_map;
|
||||
static std::map<Device*,size_t> g_num_threads_map;
|
||||
|
||||
struct TaskArena
|
||||
{
|
||||
#if USE_TASK_ARENA
|
||||
std::unique_ptr<tbb::task_arena> arena;
|
||||
#endif
|
||||
};
|
||||
|
||||
Device::Device (const char* cfg) : arena(new TaskArena())
|
||||
{
|
||||
/* check that CPU supports lowest ISA */
|
||||
if (!hasISA(ISA)) {
|
||||
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
|
||||
}
|
||||
|
||||
/* set default frequency level for detected CPU */
|
||||
switch (getCPUModel()) {
|
||||
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
|
||||
case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
|
||||
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
|
||||
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
|
||||
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
|
||||
}
|
||||
|
||||
/* initialize global state */
|
||||
#if defined(EMBREE_CONFIG)
|
||||
State::parseString(EMBREE_CONFIG);
|
||||
#endif
|
||||
State::parseString(cfg);
|
||||
State::verify();
|
||||
|
||||
/* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
|
||||
if (!checkISASupport()) {
|
||||
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
|
||||
}
|
||||
|
||||
/*! do some internal tests */
|
||||
assert(isa::Cylinder::verify());
|
||||
|
||||
/*! enable huge page support if desired */
|
||||
#if defined(__WIN32__)
|
||||
if (State::enable_selockmemoryprivilege)
|
||||
State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
|
||||
#endif
|
||||
State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
|
||||
|
||||
/*! set tessellation cache size */
|
||||
setCacheSize( State::tessellation_cache_size );
|
||||
|
||||
/*! enable some floating point exceptions to catch bugs */
|
||||
if (State::float_exceptions)
|
||||
{
|
||||
int exceptions = _MM_MASK_MASK;
|
||||
//exceptions &= ~_MM_MASK_INVALID;
|
||||
exceptions &= ~_MM_MASK_DENORM;
|
||||
exceptions &= ~_MM_MASK_DIV_ZERO;
|
||||
//exceptions &= ~_MM_MASK_OVERFLOW;
|
||||
//exceptions &= ~_MM_MASK_UNDERFLOW;
|
||||
//exceptions &= ~_MM_MASK_INEXACT;
|
||||
_MM_SET_EXCEPTION_MASK(exceptions);
|
||||
}
|
||||
|
||||
/* print info header */
|
||||
if (State::verbosity(1))
|
||||
print();
|
||||
if (State::verbosity(2))
|
||||
State::print();
|
||||
|
||||
/* register all algorithms */
|
||||
bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
|
||||
|
||||
#if defined(EMBREE_TARGET_SIMD8)
|
||||
bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
|
||||
#endif
|
||||
|
||||
/* setup tasking system */
|
||||
initTaskingSystem(numThreads);
|
||||
}
|
||||
|
||||
Device::~Device ()
|
||||
{
|
||||
setCacheSize(0);
|
||||
exitTaskingSystem();
|
||||
}
|
||||
|
||||
std::string getEnabledTargets()
|
||||
{
|
||||
std::string v;
|
||||
#if defined(EMBREE_TARGET_SSE2)
|
||||
v += "SSE2 ";
|
||||
#endif
|
||||
#if defined(EMBREE_TARGET_SSE42)
|
||||
v += "SSE4.2 ";
|
||||
#endif
|
||||
#if defined(EMBREE_TARGET_AVX)
|
||||
v += "AVX ";
|
||||
#endif
|
||||
#if defined(EMBREE_TARGET_AVX2)
|
||||
v += "AVX2 ";
|
||||
#endif
|
||||
#if defined(EMBREE_TARGET_AVX512)
|
||||
v += "AVX512 ";
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
std::string getEmbreeFeatures()
|
||||
{
|
||||
std::string v;
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
v += "raymasks ";
|
||||
#endif
|
||||
#if defined (EMBREE_BACKFACE_CULLING)
|
||||
v += "backfaceculling ";
|
||||
#endif
|
||||
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
|
||||
v += "backfacecullingcurves ";
|
||||
#endif
|
||||
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
|
||||
v += "backfacecullingspheres ";
|
||||
#endif
|
||||
#if defined(EMBREE_FILTER_FUNCTION)
|
||||
v += "intersection_filter ";
|
||||
#endif
|
||||
#if defined (EMBREE_COMPACT_POLYS)
|
||||
v += "compact_polys ";
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
|
||||
void Device::print()
|
||||
{
|
||||
const int cpu_features = getCPUFeatures();
|
||||
std::cout << std::endl;
|
||||
std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
|
||||
std::cout << " Compiler : " << getCompilerName() << std::endl;
|
||||
std::cout << " Build : ";
|
||||
#if defined(DEBUG)
|
||||
std::cout << "Debug " << std::endl;
|
||||
#else
|
||||
std::cout << "Release " << std::endl;
|
||||
#endif
|
||||
std::cout << " Platform : " << getPlatformName() << std::endl;
|
||||
std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
|
||||
std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
|
||||
std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
|
||||
std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
|
||||
const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
|
||||
const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
|
||||
std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
|
||||
std::cout << " Config" << std::endl;
|
||||
std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
|
||||
std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
|
||||
std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
|
||||
std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
|
||||
std::cout << " Features: " << getEmbreeFeatures() << std::endl;
|
||||
std::cout << " Tasking : ";
|
||||
#if defined(TASKING_TBB)
|
||||
std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
|
||||
#else
|
||||
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
|
||||
#endif
|
||||
#endif
|
||||
#if defined(TASKING_INTERNAL)
|
||||
std::cout << "internal_tasking_system ";
|
||||
#endif
|
||||
#if defined(TASKING_PPL)
|
||||
std::cout << "PPL ";
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
|
||||
/* check of FTZ and DAZ flags are set in CSR */
|
||||
if (!hasFTZ || !hasDAZ)
|
||||
{
|
||||
#if !defined(_DEBUG)
|
||||
if (State::verbosity(1))
|
||||
#endif
|
||||
{
|
||||
std::cout << std::endl;
|
||||
std::cout << "================================================================================" << std::endl;
|
||||
std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
|
||||
<< " in the MXCSR control and status register. This can have a severe " << std::endl
|
||||
<< " performance impact. Please enable these modes for each application " << std::endl
|
||||
<< " thread the following way:" << std::endl
|
||||
<< std::endl
|
||||
<< " #include \"xmmintrin.h\"" << std::endl
|
||||
<< " #include \"pmmintrin.h\"" << std::endl
|
||||
<< std::endl
|
||||
<< " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
|
||||
<< " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
|
||||
std::cout << "================================================================================" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void Device::setDeviceErrorCode(RTCError error)
|
||||
{
|
||||
RTCError* stored_error = errorHandler.error();
|
||||
if (*stored_error == RTC_ERROR_NONE)
|
||||
*stored_error = error;
|
||||
}
|
||||
|
||||
RTCError Device::getDeviceErrorCode()
|
||||
{
|
||||
RTCError* stored_error = errorHandler.error();
|
||||
RTCError error = *stored_error;
|
||||
*stored_error = RTC_ERROR_NONE;
|
||||
return error;
|
||||
}
|
||||
|
||||
void Device::setThreadErrorCode(RTCError error)
|
||||
{
|
||||
RTCError* stored_error = g_errorHandler.error();
|
||||
if (*stored_error == RTC_ERROR_NONE)
|
||||
*stored_error = error;
|
||||
}
|
||||
|
||||
RTCError Device::getThreadErrorCode()
|
||||
{
|
||||
RTCError* stored_error = g_errorHandler.error();
|
||||
RTCError error = *stored_error;
|
||||
*stored_error = RTC_ERROR_NONE;
|
||||
return error;
|
||||
}
|
||||
|
||||
void Device::process_error(Device* device, RTCError error, const char* str)
|
||||
{
|
||||
/* store global error code when device construction failed */
|
||||
if (!device)
|
||||
return setThreadErrorCode(error);
|
||||
|
||||
/* print error when in verbose mode */
|
||||
if (device->verbosity(1))
|
||||
{
|
||||
switch (error) {
|
||||
case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
|
||||
case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
|
||||
case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
|
||||
case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
|
||||
case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
|
||||
case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
|
||||
default : std::cerr << "Embree: Invalid error code"; break;
|
||||
};
|
||||
if (str) std::cerr << ", (" << str << ")";
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
/* call user specified error callback */
|
||||
if (device->error_function)
|
||||
device->error_function(device->error_function_userptr,error,str);
|
||||
|
||||
/* record error code */
|
||||
device->setDeviceErrorCode(error);
|
||||
}
|
||||
|
||||
void Device::memoryMonitor(ssize_t bytes, bool post)
|
||||
{
|
||||
if (State::memory_monitor_function && bytes != 0) {
|
||||
if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
|
||||
if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
|
||||
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t getMaxNumThreads()
|
||||
{
|
||||
size_t maxNumThreads = 0;
|
||||
for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
|
||||
maxNumThreads = max(maxNumThreads, (*i).second);
|
||||
if (maxNumThreads == 0)
|
||||
maxNumThreads = std::numeric_limits<size_t>::max();
|
||||
return maxNumThreads;
|
||||
}
|
||||
|
||||
size_t getMaxCacheSize()
|
||||
{
|
||||
size_t maxCacheSize = 0;
|
||||
for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
|
||||
maxCacheSize = max(maxCacheSize, (*i).second);
|
||||
return maxCacheSize;
|
||||
}
|
||||
|
||||
void Device::setCacheSize(size_t bytes)
|
||||
{
|
||||
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
if (bytes == 0) g_cache_size_map.erase(this);
|
||||
else g_cache_size_map[this] = bytes;
|
||||
|
||||
size_t maxCacheSize = getMaxCacheSize();
|
||||
resizeTessellationCache(maxCacheSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Device::initTaskingSystem(size_t numThreads)
|
||||
{
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
if (numThreads == 0)
|
||||
g_num_threads_map[this] = std::numeric_limits<size_t>::max();
|
||||
else
|
||||
g_num_threads_map[this] = numThreads;
|
||||
|
||||
/* create task scheduler */
|
||||
size_t maxNumThreads = getMaxNumThreads();
|
||||
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
|
||||
#if USE_TASK_ARENA
|
||||
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
|
||||
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
|
||||
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
|
||||
#endif
|
||||
}
|
||||
|
||||
void Device::exitTaskingSystem()
|
||||
{
|
||||
Lock<MutexSys> lock(g_mutex);
|
||||
g_num_threads_map.erase(this);
|
||||
|
||||
/* terminate tasking system */
|
||||
if (g_num_threads_map.size() == 0) {
|
||||
TaskScheduler::destroy();
|
||||
}
|
||||
/* or configure new number of threads */
|
||||
else {
|
||||
size_t maxNumThreads = getMaxNumThreads();
|
||||
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
|
||||
}
|
||||
#if USE_TASK_ARENA
|
||||
arena->arena.reset();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Device::execute(bool join, const std::function<void()>& func)
|
||||
{
|
||||
#if USE_TASK_ARENA
|
||||
if (join) {
|
||||
arena->arena->execute(func);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
func();
|
||||
}
|
||||
}
|
||||
|
||||
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
|
||||
{
|
||||
/* hidden internal properties */
|
||||
switch ((size_t)prop)
|
||||
{
|
||||
case 1000000: debug_int0 = val; return;
|
||||
case 1000001: debug_int1 = val; return;
|
||||
case 1000002: debug_int2 = val; return;
|
||||
case 1000003: debug_int3 = val; return;
|
||||
}
|
||||
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
|
||||
}
|
||||
|
||||
ssize_t Device::getProperty(const RTCDeviceProperty prop)
|
||||
{
|
||||
size_t iprop = (size_t)prop;
|
||||
|
||||
/* get name of internal regression test */
|
||||
if (iprop >= 2000000 && iprop < 3000000)
|
||||
{
|
||||
RegressionTest* test = getRegressionTest(iprop-2000000);
|
||||
if (test) return (ssize_t) test->name.c_str();
|
||||
else return 0;
|
||||
}
|
||||
|
||||
/* run internal regression test */
|
||||
if (iprop >= 3000000 && iprop < 4000000)
|
||||
{
|
||||
RegressionTest* test = getRegressionTest(iprop-3000000);
|
||||
if (test) return test->run();
|
||||
else return 0;
|
||||
}
|
||||
|
||||
/* documented properties */
|
||||
switch (prop)
|
||||
{
|
||||
case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
|
||||
case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
|
||||
case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
|
||||
case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
|
||||
|
||||
#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_RAY_MASK)
|
||||
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING)
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING_CURVES)
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_COMPACT_POLYS)
|
||||
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_FILTER_FUNCTION)
|
||||
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_IGNORE_INVALID_RAYS)
|
||||
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_INTERNAL)
|
||||
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_TBB)
|
||||
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_PPL)
|
||||
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE)
|
||||
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
|
||||
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_POINT)
|
||||
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_PPL)
|
||||
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
|
||||
#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
|
||||
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
|
||||
#endif
|
||||
|
||||
#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
|
||||
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
|
||||
#else
|
||||
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
|
||||
#endif
|
||||
|
||||
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
|
||||
};
|
||||
}
|
||||
|
||||
void* Device::malloc(size_t size, size_t align) {
|
||||
return alignedMalloc(size,align);
|
||||
}
|
||||
|
||||
void Device::free(void* ptr) {
|
||||
alignedFree(ptr);
|
||||
}
|
||||
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
|
||||
: Device(cfg), gpu_context(sycl_context)
|
||||
{
|
||||
/* initialize ZeWrapper */
|
||||
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
|
||||
|
||||
/* take first device as default device */
|
||||
auto devices = gpu_context.get_devices();
|
||||
if (devices.size() == 0)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
|
||||
gpu_device = devices[0];
|
||||
|
||||
/* check if RTAS build extension is available */
|
||||
sycl::platform platform = gpu_device.get_platform();
|
||||
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
|
||||
|
||||
uint32_t count = 0;
|
||||
std::vector<ze_driver_extension_properties_t> extensions;
|
||||
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
|
||||
|
||||
extensions.resize(count);
|
||||
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
|
||||
|
||||
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
|
||||
bool ze_rtas_builder = false;
|
||||
for (uint32_t i=0; i<extensions.size(); i++)
|
||||
{
|
||||
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
|
||||
ze_rtas_builder = true;
|
||||
}
|
||||
if (!ze_rtas_builder)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
|
||||
|
||||
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
|
||||
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
|
||||
if (result != ZE_RESULT_SUCCESS)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
|
||||
#else
|
||||
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
|
||||
#endif
|
||||
|
||||
if (State::verbosity(1))
|
||||
{
|
||||
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
|
||||
std::cout << " Internal RTAS Builder" << std::endl;
|
||||
else
|
||||
std::cout << " Level Zero RTAS Builder" << std::endl;
|
||||
}
|
||||
|
||||
/* check if extension library can get loaded */
|
||||
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
|
||||
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
|
||||
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
|
||||
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
|
||||
if (result == ZE_RESULT_SUCCESS)
|
||||
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
|
||||
|
||||
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
|
||||
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
|
||||
|
||||
if (State::verbosity(1))
|
||||
{
|
||||
sycl::platform platform = gpu_context.get_platform();
|
||||
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
|
||||
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
|
||||
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
|
||||
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
|
||||
}
|
||||
|
||||
DeviceGPU::~DeviceGPU()
|
||||
{
|
||||
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
|
||||
}
|
||||
|
||||
void DeviceGPU::enter() {
|
||||
enableUSMAllocEmbree(&gpu_context,&gpu_device);
|
||||
}
|
||||
|
||||
void DeviceGPU::leave() {
|
||||
disableUSMAllocEmbree();
|
||||
}
|
||||
|
||||
void* DeviceGPU::malloc(size_t size, size_t align) {
|
||||
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
|
||||
}
|
||||
|
||||
void DeviceGPU::free(void* ptr) {
|
||||
alignedSYCLFree(&gpu_context,ptr);
|
||||
}
|
||||
|
||||
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
|
||||
gpu_device = sycl_device_in;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
|
||||
: device((Device*)hdevice)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
|
||||
: device(((Scene*)hscene)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
|
||||
: device(((Geometry*)hgeometry)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
|
||||
: device(((Buffer*)hbuffer)->device)
|
||||
{
|
||||
assert(device);
|
||||
device->refInc();
|
||||
device->enter();
|
||||
}
|
||||
|
||||
DeviceEnterLeave::~DeviceEnterLeave() {
|
||||
device->leave();
|
||||
device->refDec();
|
||||
}
|
||||
}
|
||||
194
engine/thirdparty/embree/kernels/common/device.h
vendored
Normal file
194
engine/thirdparty/embree/kernels/common/device.h
vendored
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "state.h"
|
||||
#include "accel.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class BVH4Factory;
|
||||
class BVH8Factory;
|
||||
struct TaskArena;
|
||||
|
||||
class Device : public State, public MemoryMonitorInterface
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
|
||||
public:
|
||||
|
||||
/*! allocator that performs unified shared memory allocations */
|
||||
template<typename T, size_t alignment>
|
||||
struct allocator
|
||||
{
|
||||
typedef T value_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
|
||||
allocator() {}
|
||||
|
||||
allocator(Device* device)
|
||||
: device(device) {}
|
||||
|
||||
__forceinline pointer allocate( size_type n ) {
|
||||
assert(device);
|
||||
return (pointer) device->malloc(n*sizeof(T),alignment);
|
||||
}
|
||||
|
||||
__forceinline void deallocate( pointer p, size_type n ) {
|
||||
if (device) device->free(p);
|
||||
}
|
||||
|
||||
__forceinline void construct( pointer p, const_reference val ) {
|
||||
new (p) T(val);
|
||||
}
|
||||
|
||||
__forceinline void destroy( pointer p ) {
|
||||
p->~T();
|
||||
}
|
||||
|
||||
Device* device = nullptr;
|
||||
};
|
||||
|
||||
/*! vector class that performs aligned allocations from Device object */
|
||||
template<typename T>
|
||||
using vector = vector_t<T,allocator<T,std::alignment_of<T>::value>>;
|
||||
|
||||
template<typename T, size_t alignment>
|
||||
using avector = vector_t<T,allocator<T,alignment>>;
|
||||
|
||||
public:
|
||||
|
||||
/*! Device construction */
|
||||
Device (const char* cfg);
|
||||
|
||||
/*! Device destruction */
|
||||
virtual ~Device ();
|
||||
|
||||
/*! prints info about the device */
|
||||
void print();
|
||||
|
||||
/*! sets the error code */
|
||||
void setDeviceErrorCode(RTCError error);
|
||||
|
||||
/*! returns and clears the error code */
|
||||
RTCError getDeviceErrorCode();
|
||||
|
||||
/*! sets the error code */
|
||||
static void setThreadErrorCode(RTCError error);
|
||||
|
||||
/*! returns and clears the error code */
|
||||
static RTCError getThreadErrorCode();
|
||||
|
||||
/*! processes error codes, do not call directly */
|
||||
static void process_error(Device* device, RTCError error, const char* str);
|
||||
|
||||
/*! invokes the memory monitor callback */
|
||||
void memoryMonitor(ssize_t bytes, bool post);
|
||||
|
||||
/*! sets the size of the software cache. */
|
||||
void setCacheSize(size_t bytes);
|
||||
|
||||
/*! sets a property */
|
||||
void setProperty(const RTCDeviceProperty prop, ssize_t val);
|
||||
|
||||
/*! gets a property */
|
||||
ssize_t getProperty(const RTCDeviceProperty prop);
|
||||
|
||||
/*! enter device by setting up some global state */
|
||||
virtual void enter() {}
|
||||
|
||||
/*! leave device by setting up some global state */
|
||||
virtual void leave() {}
|
||||
|
||||
/*! buffer allocation */
|
||||
virtual void* malloc(size_t size, size_t align);
|
||||
|
||||
/*! buffer deallocation */
|
||||
virtual void free(void* ptr);
|
||||
|
||||
private:
|
||||
|
||||
/*! initializes the tasking system */
|
||||
void initTaskingSystem(size_t numThreads);
|
||||
|
||||
/*! shuts down the tasking system */
|
||||
void exitTaskingSystem();
|
||||
|
||||
std::unique_ptr<TaskArena> arena;
|
||||
|
||||
public:
|
||||
|
||||
// use tasking system arena to execute func
|
||||
void execute(bool join, const std::function<void()>& func);
|
||||
|
||||
/*! some variables that can be set via rtcSetParameter1i for debugging purposes */
|
||||
public:
|
||||
static ssize_t debug_int0;
|
||||
static ssize_t debug_int1;
|
||||
static ssize_t debug_int2;
|
||||
static ssize_t debug_int3;
|
||||
|
||||
public:
|
||||
std::unique_ptr<BVH4Factory> bvh4_factory;
|
||||
#if defined(EMBREE_TARGET_SIMD8)
|
||||
std::unique_ptr<BVH8Factory> bvh8_factory;
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
class DeviceGPU : public Device
|
||||
{
|
||||
public:
|
||||
|
||||
DeviceGPU(sycl::context sycl_context, const char* cfg);
|
||||
~DeviceGPU();
|
||||
|
||||
virtual void enter() override;
|
||||
virtual void leave() override;
|
||||
virtual void* malloc(size_t size, size_t align) override;
|
||||
virtual void free(void* ptr) override;
|
||||
|
||||
/* set SYCL device */
|
||||
void setSYCLDevice(const sycl::device sycl_device);
|
||||
|
||||
private:
|
||||
sycl::context gpu_context;
|
||||
sycl::device gpu_device;
|
||||
|
||||
unsigned int gpu_maxWorkGroupSize;
|
||||
unsigned int gpu_maxComputeUnits;
|
||||
|
||||
public:
|
||||
void* dispatchGlobalsPtr = nullptr;
|
||||
|
||||
public:
|
||||
inline sycl::device &getGPUDevice() { return gpu_device; }
|
||||
inline sycl::context &getGPUContext() { return gpu_context; }
|
||||
|
||||
inline unsigned int getGPUMaxWorkGroupSize() { return gpu_maxWorkGroupSize; }
|
||||
|
||||
void init_rthw_level_zero();
|
||||
void init_rthw_opencl();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
struct DeviceEnterLeave
|
||||
{
|
||||
DeviceEnterLeave (RTCDevice hdevice);
|
||||
DeviceEnterLeave (RTCScene hscene);
|
||||
DeviceEnterLeave (RTCGeometry hgeometry);
|
||||
DeviceEnterLeave (RTCBuffer hbuffer);
|
||||
~DeviceEnterLeave();
|
||||
private:
|
||||
Device* device;
|
||||
};
|
||||
}
|
||||
265
engine/thirdparty/embree/kernels/common/geometry.cpp
vendored
Normal file
265
engine/thirdparty/embree/kernels/common/geometry.cpp
vendored
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "geometry.h"
|
||||
#include "scene.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
const char* Geometry::gtype_names[Geometry::GTY_END] =
|
||||
{
|
||||
"flat_linear_curve",
|
||||
"round_linear_curve",
|
||||
"oriented_linear_curve",
|
||||
"",
|
||||
"flat_bezier_curve",
|
||||
"round_bezier_curve",
|
||||
"oriented_bezier_curve",
|
||||
"",
|
||||
"flat_bspline_curve",
|
||||
"round_bspline_curve",
|
||||
"oriented_bspline_curve",
|
||||
"",
|
||||
"flat_hermite_curve",
|
||||
"round_hermite_curve",
|
||||
"oriented_hermite_curve",
|
||||
"",
|
||||
"flat_catmull_rom_curve",
|
||||
"round_catmull_rom_curve",
|
||||
"oriented_catmull_rom_curve",
|
||||
"",
|
||||
"triangles",
|
||||
"quads",
|
||||
"grid",
|
||||
"subdivs",
|
||||
"",
|
||||
"sphere",
|
||||
"disc",
|
||||
"oriented_disc",
|
||||
"",
|
||||
"usergeom",
|
||||
"instance_cheap",
|
||||
"instance_expensive",
|
||||
};
|
||||
|
||||
Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps)
|
||||
: device(device), userPtr(nullptr),
|
||||
numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f),
|
||||
mask(1),
|
||||
gtype(gtype),
|
||||
gsubtype(GTY_SUBTYPE_DEFAULT),
|
||||
quality(RTC_BUILD_QUALITY_MEDIUM),
|
||||
state((unsigned)State::MODIFIED),
|
||||
enabled(true),
|
||||
argumentFilterEnabled(false),
|
||||
intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr)
|
||||
{
|
||||
device->refInc();
|
||||
}
|
||||
|
||||
Geometry::~Geometry()
|
||||
{
|
||||
device->refDec();
|
||||
}
|
||||
|
||||
void Geometry::setNumPrimitives(unsigned int numPrimitives_in)
|
||||
{
|
||||
if (numPrimitives_in == numPrimitives) return;
|
||||
|
||||
numPrimitives = numPrimitives_in;
|
||||
|
||||
Geometry::update();
|
||||
}
|
||||
|
||||
void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in)
|
||||
{
|
||||
if (numTimeSteps_in == numTimeSteps) {
|
||||
return;
|
||||
}
|
||||
|
||||
numTimeSteps = numTimeSteps_in;
|
||||
fnumTimeSegments = float(numTimeSteps_in-1);
|
||||
|
||||
Geometry::update();
|
||||
}
|
||||
|
||||
void Geometry::setTimeRange (const BBox1f range)
|
||||
{
|
||||
time_range = range;
|
||||
Geometry::update();
|
||||
}
|
||||
|
||||
BBox1f Geometry::getTimeRange () const
|
||||
{
|
||||
return time_range;
|
||||
}
|
||||
|
||||
void Geometry::update()
|
||||
{
|
||||
++modCounter_; // FIXME: required?
|
||||
state = (unsigned)State::MODIFIED;
|
||||
}
|
||||
|
||||
void Geometry::commit()
|
||||
{
|
||||
++modCounter_;
|
||||
state = (unsigned)State::COMMITTED;
|
||||
}
|
||||
|
||||
void Geometry::preCommit()
|
||||
{
|
||||
if (State::MODIFIED == (State)state)
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed");
|
||||
}
|
||||
|
||||
void Geometry::postCommit()
|
||||
{
|
||||
}
|
||||
|
||||
void Geometry::enable ()
|
||||
{
|
||||
if (isEnabled())
|
||||
return;
|
||||
|
||||
enabled = true;
|
||||
++modCounter_;
|
||||
}
|
||||
|
||||
void Geometry::disable ()
|
||||
{
|
||||
if (isDisabled())
|
||||
return;
|
||||
|
||||
enabled = false;
|
||||
++modCounter_;
|
||||
}
|
||||
|
||||
void Geometry::setUserData (void* ptr)
|
||||
{
|
||||
userPtr = ptr;
|
||||
}
|
||||
|
||||
void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter)
|
||||
{
|
||||
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
|
||||
|
||||
intersectionFilterN = filter;
|
||||
}
|
||||
|
||||
void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter)
|
||||
{
|
||||
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
|
||||
|
||||
occlusionFilterN = filter;
|
||||
}
|
||||
|
||||
void Geometry::setPointQueryFunction (RTCPointQueryFunction func)
|
||||
{
|
||||
pointQueryFunc = func;
|
||||
}
|
||||
|
||||
void Geometry::interpolateN(const RTCInterpolateNArguments* const args)
|
||||
{
|
||||
const void* valid_i = args->valid;
|
||||
const unsigned* primIDs = args->primIDs;
|
||||
const float* u = args->u;
|
||||
const float* v = args->v;
|
||||
unsigned int N = args->N;
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* dPdv = args->dPdv;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
float* ddPdvdv = args->ddPdvdv;
|
||||
float* ddPdudv = args->ddPdudv;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex");
|
||||
const int* valid = (const int*) valid_i;
|
||||
|
||||
__aligned(64) float P_tmp[256];
|
||||
__aligned(64) float dPdu_tmp[256];
|
||||
__aligned(64) float dPdv_tmp[256];
|
||||
__aligned(64) float ddPdudu_tmp[256];
|
||||
__aligned(64) float ddPdvdv_tmp[256];
|
||||
__aligned(64) float ddPdudv_tmp[256];
|
||||
|
||||
float* Pt = P ? P_tmp : nullptr;
|
||||
float* dPdut = nullptr, *dPdvt = nullptr;
|
||||
if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; }
|
||||
float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr;
|
||||
if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; }
|
||||
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
if (valid && !valid[i]) continue;
|
||||
|
||||
RTCInterpolateArguments iargs;
|
||||
iargs.primID = primIDs[i];
|
||||
iargs.u = u[i];
|
||||
iargs.v = v[i];
|
||||
iargs.bufferType = bufferType;
|
||||
iargs.bufferSlot = bufferSlot;
|
||||
iargs.P = Pt;
|
||||
iargs.dPdu = dPdut;
|
||||
iargs.dPdv = dPdvt;
|
||||
iargs.ddPdudu = ddPdudut;
|
||||
iargs.ddPdvdv = ddPdvdvt;
|
||||
iargs.ddPdudv = ddPdudvt;
|
||||
iargs.valueCount = valueCount;
|
||||
interpolate(&iargs);
|
||||
|
||||
if (likely(P)) {
|
||||
for (unsigned int j=0; j<valueCount; j++)
|
||||
P[j*N+i] = Pt[j];
|
||||
}
|
||||
if (likely(dPdu))
|
||||
{
|
||||
for (unsigned int j=0; j<valueCount; j++) {
|
||||
dPdu[j*N+i] = dPdut[j];
|
||||
dPdv[j*N+i] = dPdvt[j];
|
||||
}
|
||||
}
|
||||
if (likely(ddPdudu))
|
||||
{
|
||||
for (unsigned int j=0; j<valueCount; j++) {
|
||||
ddPdudu[j*N+i] = ddPdudut[j];
|
||||
ddPdvdv[j*N+i] = ddPdvdvt[j];
|
||||
ddPdudv[j*N+i] = ddPdudvt[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context)
|
||||
{
|
||||
assert(context->primID < size());
|
||||
|
||||
RTCPointQueryFunctionArguments args;
|
||||
args.query = (RTCPointQuery*)context->query_ws;
|
||||
args.userPtr = context->userPtr;
|
||||
args.primID = context->primID;
|
||||
args.geomID = context->geomID;
|
||||
args.context = context->userContext;
|
||||
args.similarityScale = context->similarityScale;
|
||||
|
||||
bool update = false;
|
||||
if(context->func) update |= context->func(&args);
|
||||
if(pointQueryFunc) update |= pointQueryFunc(&args);
|
||||
|
||||
if (update && context->userContext->instStackSize > 0)
|
||||
{
|
||||
// update point query
|
||||
if (context->query_type == POINT_QUERY_TYPE_AABB) {
|
||||
context->updateAABB();
|
||||
} else {
|
||||
assert(context->similarityScale > 0.f);
|
||||
query->radius = context->query_ws->radius * context->similarityScale;
|
||||
}
|
||||
}
|
||||
return update;
|
||||
}
|
||||
}
|
||||
663
engine/thirdparty/embree/kernels/common/geometry.h
vendored
Normal file
663
engine/thirdparty/embree/kernels/common/geometry.h
vendored
Normal file
|
|
@ -0,0 +1,663 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "device.h"
|
||||
#include "buffer.h"
|
||||
#include "../common/point_query.h"
|
||||
#include "../builders/priminfo.h"
|
||||
#include "../builders/priminfo_mb.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
class Scene;
|
||||
class Geometry;
|
||||
|
||||
struct GeometryCounts
|
||||
{
|
||||
__forceinline GeometryCounts()
|
||||
: numFilterFunctions(0),
|
||||
numTriangles(0), numMBTriangles(0),
|
||||
numQuads(0), numMBQuads(0),
|
||||
numBezierCurves(0), numMBBezierCurves(0),
|
||||
numLineSegments(0), numMBLineSegments(0),
|
||||
numSubdivPatches(0), numMBSubdivPatches(0),
|
||||
numUserGeometries(0), numMBUserGeometries(0),
|
||||
numInstancesCheap(0), numMBInstancesCheap(0),
|
||||
numInstancesExpensive(0), numMBInstancesExpensive(0),
|
||||
numInstanceArrays(0), numMBInstanceArrays(0),
|
||||
numGrids(0), numMBGrids(0),
|
||||
numSubGrids(0), numMBSubGrids(0),
|
||||
numPoints(0), numMBPoints(0) {}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numInstanceArrays + numGrids + numPoints
|
||||
+ numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBInstanceArrays + numMBGrids + numMBPoints;
|
||||
}
|
||||
|
||||
__forceinline unsigned int enabledGeometryTypesMask() const
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
if (numTriangles) mask |= 1 << 0;
|
||||
if (numQuads) mask |= 1 << 1;
|
||||
if (numBezierCurves+numLineSegments) mask |= 1 << 2;
|
||||
if (numSubdivPatches) mask |= 1 << 3;
|
||||
if (numUserGeometries) mask |= 1 << 4;
|
||||
if (numInstancesCheap) mask |= 1 << 5;
|
||||
if (numInstancesExpensive) mask |= 1 << 6;
|
||||
if (numInstanceArrays) mask |= 1 << 7;
|
||||
if (numGrids) mask |= 1 << 8;
|
||||
if (numPoints) mask |= 1 << 9;
|
||||
|
||||
unsigned int maskMB = 0;
|
||||
if (numMBTriangles) maskMB |= 1 << 0;
|
||||
if (numMBQuads) maskMB |= 1 << 1;
|
||||
if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2;
|
||||
if (numMBSubdivPatches) maskMB |= 1 << 3;
|
||||
if (numMBUserGeometries) maskMB |= 1 << 4;
|
||||
if (numMBInstancesCheap) maskMB |= 1 << 5;
|
||||
if (numMBInstancesExpensive) maskMB |= 1 << 6;
|
||||
if (numMBInstanceArrays) maskMB |= 1 << 7;
|
||||
if (numMBGrids) maskMB |= 1 << 8;
|
||||
if (numMBPoints) maskMB |= 1 << 9;
|
||||
|
||||
return (mask<<8) + maskMB;
|
||||
}
|
||||
|
||||
__forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const
|
||||
{
|
||||
GeometryCounts ret;
|
||||
ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions;
|
||||
ret.numTriangles = numTriangles + rhs.numTriangles;
|
||||
ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles;
|
||||
ret.numQuads = numQuads + rhs.numQuads;
|
||||
ret.numMBQuads = numMBQuads + rhs.numMBQuads;
|
||||
ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves;
|
||||
ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves;
|
||||
ret.numLineSegments = numLineSegments + rhs.numLineSegments;
|
||||
ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments;
|
||||
ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches;
|
||||
ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches;
|
||||
ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries;
|
||||
ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries;
|
||||
ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap;
|
||||
ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap;
|
||||
ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive;
|
||||
ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive;
|
||||
ret.numInstanceArrays = numInstanceArrays + rhs.numInstanceArrays;
|
||||
ret.numMBInstanceArrays = numMBInstanceArrays + rhs.numMBInstanceArrays;
|
||||
ret.numGrids = numGrids + rhs.numGrids;
|
||||
ret.numMBGrids = numMBGrids + rhs.numMBGrids;
|
||||
ret.numSubGrids = numSubGrids + rhs.numSubGrids;
|
||||
ret.numMBSubGrids = numMBSubGrids + rhs.numMBSubGrids;
|
||||
ret.numPoints = numPoints + rhs.numPoints;
|
||||
ret.numMBPoints = numMBPoints + rhs.numMBPoints;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t numFilterFunctions; //!< number of geometries with filter functions enabled
|
||||
size_t numTriangles; //!< number of enabled triangles
|
||||
size_t numMBTriangles; //!< number of enabled motion blurred triangles
|
||||
size_t numQuads; //!< number of enabled quads
|
||||
size_t numMBQuads; //!< number of enabled motion blurred quads
|
||||
size_t numBezierCurves; //!< number of enabled curves
|
||||
size_t numMBBezierCurves; //!< number of enabled motion blurred curves
|
||||
size_t numLineSegments; //!< number of enabled line segments
|
||||
size_t numMBLineSegments; //!< number of enabled line motion blurred segments
|
||||
size_t numSubdivPatches; //!< number of enabled subdivision patches
|
||||
size_t numMBSubdivPatches; //!< number of enabled motion blurred subdivision patches
|
||||
size_t numUserGeometries; //!< number of enabled user geometries
|
||||
size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
|
||||
size_t numInstancesCheap; //!< number of enabled cheap instances
|
||||
size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances
|
||||
size_t numInstancesExpensive; //!< number of enabled expensive instances
|
||||
size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances
|
||||
size_t numInstanceArrays; //!< number of enabled instance arrays
|
||||
size_t numMBInstanceArrays; //!< number of enabled motion blurred instance arrays
|
||||
size_t numGrids; //!< number of enabled grid geometries
|
||||
size_t numMBGrids; //!< number of enabled motion blurred grid geometries
|
||||
size_t numSubGrids; //!< number of enabled grid geometries
|
||||
size_t numMBSubGrids; //!< number of enabled motion blurred grid geometries
|
||||
size_t numPoints; //!< number of enabled points
|
||||
size_t numMBPoints; //!< number of enabled motion blurred points
|
||||
};
|
||||
|
||||
/*! Base class all geometries are derived from */
|
||||
class Geometry : public RefCount
|
||||
{
|
||||
ALIGNED_CLASS_USM_(16);
|
||||
|
||||
friend class Scene;
|
||||
public:
|
||||
|
||||
/*! type of geometry */
|
||||
enum GType
|
||||
{
|
||||
GTY_FLAT_LINEAR_CURVE = 0,
|
||||
GTY_ROUND_LINEAR_CURVE = 1,
|
||||
GTY_ORIENTED_LINEAR_CURVE = 2,
|
||||
GTY_CONE_LINEAR_CURVE = 3,
|
||||
|
||||
GTY_FLAT_BEZIER_CURVE = 4,
|
||||
GTY_ROUND_BEZIER_CURVE = 5,
|
||||
GTY_ORIENTED_BEZIER_CURVE = 6,
|
||||
|
||||
GTY_FLAT_BSPLINE_CURVE = 8,
|
||||
GTY_ROUND_BSPLINE_CURVE = 9,
|
||||
GTY_ORIENTED_BSPLINE_CURVE = 10,
|
||||
|
||||
GTY_FLAT_HERMITE_CURVE = 12,
|
||||
GTY_ROUND_HERMITE_CURVE = 13,
|
||||
GTY_ORIENTED_HERMITE_CURVE = 14,
|
||||
|
||||
GTY_FLAT_CATMULL_ROM_CURVE = 16,
|
||||
GTY_ROUND_CATMULL_ROM_CURVE = 17,
|
||||
GTY_ORIENTED_CATMULL_ROM_CURVE = 18,
|
||||
|
||||
GTY_TRIANGLE_MESH = 20,
|
||||
GTY_QUAD_MESH = 21,
|
||||
GTY_GRID_MESH = 22,
|
||||
GTY_SUBDIV_MESH = 23,
|
||||
|
||||
GTY_SPHERE_POINT = 25,
|
||||
GTY_DISC_POINT = 26,
|
||||
GTY_ORIENTED_DISC_POINT = 27,
|
||||
|
||||
GTY_USER_GEOMETRY = 29,
|
||||
GTY_INSTANCE_CHEAP = 30,
|
||||
GTY_INSTANCE_EXPENSIVE = 31,
|
||||
GTY_INSTANCE_ARRAY = 24,
|
||||
GTY_END = 32,
|
||||
|
||||
GTY_BASIS_LINEAR = 0,
|
||||
GTY_BASIS_BEZIER = 4,
|
||||
GTY_BASIS_BSPLINE = 8,
|
||||
GTY_BASIS_HERMITE = 12,
|
||||
GTY_BASIS_CATMULL_ROM = 16,
|
||||
GTY_BASIS_MASK = 28,
|
||||
|
||||
GTY_SUBTYPE_FLAT_CURVE = 0,
|
||||
GTY_SUBTYPE_ROUND_CURVE = 1,
|
||||
GTY_SUBTYPE_ORIENTED_CURVE = 2,
|
||||
GTY_SUBTYPE_MASK = 3,
|
||||
};
|
||||
|
||||
enum GSubType
|
||||
{
|
||||
GTY_SUBTYPE_DEFAULT= 0,
|
||||
GTY_SUBTYPE_INSTANCE_LINEAR = 0,
|
||||
GTY_SUBTYPE_INSTANCE_QUATERNION = 1
|
||||
};
|
||||
|
||||
enum GTypeMask
|
||||
{
|
||||
MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE,
|
||||
MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE,
|
||||
MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE,
|
||||
MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE,
|
||||
|
||||
MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE,
|
||||
MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE,
|
||||
MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE,
|
||||
|
||||
MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE,
|
||||
MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE,
|
||||
MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE,
|
||||
|
||||
MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE,
|
||||
MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE,
|
||||
MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE,
|
||||
|
||||
MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE,
|
||||
MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE,
|
||||
MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE,
|
||||
|
||||
MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE,
|
||||
|
||||
MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE |
|
||||
MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE |
|
||||
MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE |
|
||||
MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE,
|
||||
|
||||
MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT,
|
||||
MTY_DISC_POINT = 1ul << GTY_DISC_POINT,
|
||||
MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT,
|
||||
|
||||
MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT,
|
||||
|
||||
MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS,
|
||||
|
||||
MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH,
|
||||
MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH,
|
||||
MTY_GRID_MESH = 1ul << GTY_GRID_MESH,
|
||||
MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH,
|
||||
MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY,
|
||||
|
||||
MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP,
|
||||
MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE,
|
||||
MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE,
|
||||
MTY_INSTANCE_ARRAY = 1ul << GTY_INSTANCE_ARRAY,
|
||||
|
||||
MTY_ALL = -1
|
||||
};
|
||||
|
||||
static const char* gtype_names[GTY_END];
|
||||
|
||||
enum class State : unsigned {
|
||||
MODIFIED = 0,
|
||||
COMMITTED = 1,
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! Geometry constructor */
|
||||
Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps);
|
||||
|
||||
/*! Geometry destructor */
|
||||
virtual ~Geometry();
|
||||
|
||||
public:
|
||||
|
||||
/*! tests if geometry is enabled */
|
||||
__forceinline bool isEnabled() const { return enabled; }
|
||||
|
||||
/*! tests if geometry is disabled */
|
||||
__forceinline bool isDisabled() const { return !isEnabled(); }
|
||||
|
||||
/* checks if argument version of filter functions are enabled */
|
||||
__forceinline bool hasArgumentFilterFunctions() const {
|
||||
return argumentFilterEnabled;
|
||||
}
|
||||
|
||||
/*! tests if that geometry has some filter function set */
|
||||
__forceinline bool hasGeometryFilterFunctions () const {
|
||||
return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr);
|
||||
}
|
||||
|
||||
/*! returns geometry type */
|
||||
__forceinline GType getType() const { return gtype; }
|
||||
|
||||
/*! returns curve type */
|
||||
__forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); }
|
||||
|
||||
/*! returns curve basis */
|
||||
__forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); }
|
||||
|
||||
/*! returns geometry type mask */
|
||||
__forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); }
|
||||
|
||||
/*! returns true of geometry contains motion blur */
|
||||
__forceinline bool hasMotionBlur () const {
|
||||
return numTimeSteps > 1;
|
||||
}
|
||||
|
||||
/*! returns number of primitives */
|
||||
__forceinline size_t size() const { return numPrimitives; }
|
||||
|
||||
/*! sets the number of primitives */
|
||||
virtual void setNumPrimitives(unsigned int numPrimitives_in);
|
||||
|
||||
/*! sets number of time steps */
|
||||
virtual void setNumTimeSteps (unsigned int numTimeSteps_in);
|
||||
|
||||
/*! sets motion blur time range */
|
||||
void setTimeRange (const BBox1f range);
|
||||
|
||||
/*! gets motion blur time range */
|
||||
BBox1f getTimeRange () const;
|
||||
|
||||
/*! sets number of vertex attributes */
|
||||
virtual void setVertexAttributeCount (unsigned int N) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! sets number of topologies */
|
||||
virtual void setTopologyCount (unsigned int N) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! sets the build quality */
|
||||
void setBuildQuality(RTCBuildQuality quality_in)
|
||||
{
|
||||
this->quality = quality_in;
|
||||
Geometry::update();
|
||||
}
|
||||
|
||||
/* calculate time segment itime and fractional time ftime */
|
||||
__forceinline int timeSegment(float time, float& ftime) const {
|
||||
return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
__forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const {
|
||||
return getTimeSegment<N>(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime);
|
||||
}
|
||||
|
||||
/* calculate overlapping time segment range */
|
||||
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
|
||||
return getTimeSegmentRange(range,time_range,fnumTimeSegments);
|
||||
}
|
||||
|
||||
/* returns time that corresponds to time step */
|
||||
__forceinline float timeStep(const int i) const {
|
||||
assert(i>=0 && i<(int)numTimeSteps);
|
||||
return time_range.lower + time_range.size()*float(i)/fnumTimeSegments;
|
||||
}
|
||||
|
||||
/*! for all geometries */
|
||||
public:
|
||||
|
||||
/*! Enable geometry. */
|
||||
virtual void enable();
|
||||
|
||||
/*! Update geometry. */
|
||||
void update();
|
||||
|
||||
/*! commit of geometry */
|
||||
virtual void commit();
|
||||
|
||||
/*! Update geometry buffer. */
|
||||
virtual void updateBuffer(RTCBufferType type, unsigned int slot) {
|
||||
update(); // update everything for geometries not supporting this call
|
||||
}
|
||||
|
||||
/*! Disable geometry. */
|
||||
virtual void disable();
|
||||
|
||||
/*! Verify the geometry */
|
||||
virtual bool verify() { return true; }
|
||||
|
||||
/*! called before every build */
|
||||
virtual void preCommit();
|
||||
|
||||
/*! called after every build */
|
||||
virtual void postCommit();
|
||||
|
||||
virtual void addElementsToCount (GeometryCounts & counts) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
};
|
||||
|
||||
/*! sets constant tessellation rate for the geometry */
|
||||
virtual void setTessellationRate(float N) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Sets the maximal curve radius scale allowed by min-width feature. */
|
||||
virtual void setMaxRadiusScale(float s) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set user data pointer. */
|
||||
virtual void setUserData(void* ptr);
|
||||
|
||||
/*! Get user data pointer. */
|
||||
__forceinline void* getUserData() const {
|
||||
return userPtr;
|
||||
}
|
||||
|
||||
/*! interpolates user data to the specified u/v location */
|
||||
virtual void interpolate(const RTCInterpolateArguments* const args) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! interpolates user data to the specified u/v locations */
|
||||
virtual void interpolateN(const RTCInterpolateNArguments* const args);
|
||||
|
||||
/* point query api */
|
||||
bool pointQuery(PointQuery* query, PointQueryContext* context);
|
||||
|
||||
/*! for subdivision surfaces only */
|
||||
public:
|
||||
virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set displacement function. */
|
||||
virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual unsigned int getFirstHalfEdge(unsigned int faceID) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual unsigned int getFace(unsigned int edgeID) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual unsigned int getNextHalfEdge(unsigned int edgeID) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! get fast access to first vertex buffer if applicable */
|
||||
virtual float * getCompactVertexArray () const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/*! Returns the modified counter - how many times the geo has been modified */
|
||||
__forceinline unsigned int getModCounter () const {
|
||||
return modCounter_;
|
||||
}
|
||||
|
||||
/*! for triangle meshes and bezier curves only */
|
||||
public:
|
||||
|
||||
|
||||
/*! Sets ray mask. */
|
||||
virtual void setMask(unsigned mask) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Sets specified buffer. */
|
||||
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Gets specified buffer. */
|
||||
virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set intersection filter function for ray packets of size N. */
|
||||
virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN);
|
||||
|
||||
/*! Set occlusion filter function for ray packets of size N. */
|
||||
virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN);
|
||||
|
||||
/* Enables argument version of intersection or occlusion filter function. */
|
||||
virtual void enableFilterFunctionFromArguments (bool enable) {
|
||||
argumentFilterEnabled = enable;
|
||||
}
|
||||
|
||||
/*! for instances only */
|
||||
public:
|
||||
|
||||
/*! Sets the instanced scene */
|
||||
virtual void setInstancedScene(const Ref<Scene>& scene) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Sets the instanced scenes */
|
||||
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Sets transformation of the instance */
|
||||
virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Sets transformation of the instance */
|
||||
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Returns the transformation of the instance */
|
||||
virtual AffineSpace3fa getTransform(float time) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Returns the transformation of the instance */
|
||||
virtual AffineSpace3fa getTransform(size_t instance, float time) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! for user geometries only */
|
||||
public:
|
||||
|
||||
/*! Set bounds function. */
|
||||
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set intersect function for ray packets of size N. */
|
||||
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set occlusion function for ray packets of size N. */
|
||||
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
|
||||
}
|
||||
|
||||
/*! Set point query function. */
|
||||
void setPointQueryFunction(RTCPointQueryFunction func);
|
||||
|
||||
/*! returns number of time segments */
|
||||
__forceinline unsigned numTimeSegments () const {
|
||||
return numTimeSteps-1;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
virtual PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry");
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefArray(prims.data(),r,k,geomID);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(avector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefArray(prims.data(),r,k,geomID);
|
||||
}
|
||||
|
||||
virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefArray(prims,r,k,geomID);
|
||||
}
|
||||
|
||||
virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
|
||||
}
|
||||
|
||||
/*! Calculates the PrimRef over the complete time interval */
|
||||
virtual PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(avector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
|
||||
}
|
||||
|
||||
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
|
||||
return createPrimRefMBArray(prims,t0t1,r,k,geomID);
|
||||
}
|
||||
|
||||
virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual Vec3fa computeDirection(unsigned int primID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual Vec3fa computeDirection(unsigned int primID, size_t time) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual BBox3fa vbounds(size_t primID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const {
|
||||
return vlinearBounds(primID,time_range);
|
||||
}
|
||||
|
||||
virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
|
||||
}
|
||||
|
||||
public:
|
||||
__forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; }
|
||||
__forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; }
|
||||
|
||||
public:
|
||||
Device* device; //!< device this geometry belongs to
|
||||
|
||||
void* userPtr; //!< user pointer
|
||||
unsigned int numPrimitives; //!< number of primitives of this geometry
|
||||
|
||||
unsigned int numTimeSteps; //!< number of time steps
|
||||
float fnumTimeSegments; //!< number of time segments (precalculation)
|
||||
BBox1f time_range; //!< motion blur time range
|
||||
|
||||
unsigned int mask; //!< for masking out geometry
|
||||
unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified
|
||||
|
||||
struct {
|
||||
GType gtype : 8; //!< geometry type
|
||||
GSubType gsubtype : 8; //!< geometry subtype
|
||||
RTCBuildQuality quality : 3; //!< build quality for geometry
|
||||
unsigned state : 2;
|
||||
bool enabled : 1; //!< true if geometry is enabled
|
||||
bool argumentFilterEnabled : 1; //!< true if argument filter functions are enabled for this geometry
|
||||
};
|
||||
|
||||
RTCFilterFunctionN intersectionFilterN;
|
||||
RTCFilterFunctionN occlusionFilterN;
|
||||
RTCPointQueryFunction pointQueryFunc;
|
||||
};
|
||||
}
|
||||
153
engine/thirdparty/embree/kernels/common/hit.h
vendored
Normal file
153
engine/thirdparty/embree/kernels/common/hit.h
vendored
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "ray.h"
|
||||
#include "instance_stack.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* Hit structure for K hits */
|
||||
template<int K>
|
||||
struct HitK
|
||||
{
|
||||
/* Default construction does nothing */
|
||||
__forceinline HitK() {}
|
||||
|
||||
/* Constructs a hit */
|
||||
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng)
|
||||
: Ng(Ng), u(u), v(v), primID(primID), geomID(geomID)
|
||||
{
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
instID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
}
|
||||
|
||||
instance_id_stack::copy_UV<K>(context->instID, instID);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
instance_id_stack::copy_UV<K>(context->instPrimID, instPrimID);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Constructs a hit */
|
||||
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const Vec2vf<K>& uv, const Vec3vf<K>& Ng)
|
||||
: HitK(context,geomID,primID,uv.x,uv.y,Ng) {}
|
||||
|
||||
/* Returns the size of the hit */
|
||||
static __forceinline size_t size() { return K; }
|
||||
|
||||
public:
|
||||
Vec3vf<K> Ng; // geometry normal
|
||||
vfloat<K> u; // barycentric u coordinate of hit
|
||||
vfloat<K> v; // barycentric v coordinate of hit
|
||||
vuint<K> primID; // primitive ID
|
||||
vuint<K> geomID; // geometry ID
|
||||
vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Specialization for a single hit */
|
||||
template<>
|
||||
struct __aligned(16) HitK<1>
|
||||
{
|
||||
/* Default construction does nothing */
|
||||
__forceinline HitK() {}
|
||||
|
||||
/* Constructs a hit */
|
||||
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng)
|
||||
: Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID)
|
||||
{
|
||||
instance_id_stack::copy_UU(context, context->instID, instID);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
instance_id_stack::copy_UU(context, context->instPrimID, instPrimID);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Constructs a hit */
|
||||
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, const Vec2f& uv, const Vec3fa& Ng)
|
||||
: HitK<1>(context,geomID,primID,uv.x,uv.y,Ng) {}
|
||||
|
||||
/* Returns the size of the hit */
|
||||
static __forceinline size_t size() { return 1; }
|
||||
|
||||
public:
|
||||
Vec3<float> Ng; // geometry normal
|
||||
float u; // barycentric u coordinate of hit
|
||||
float v; // barycentric v coordinate of hit
|
||||
unsigned int primID; // primitive ID
|
||||
unsigned int geomID; // geometry ID
|
||||
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Shortcuts */
|
||||
typedef HitK<1> Hit;
|
||||
typedef HitK<4> Hit4;
|
||||
typedef HitK<8> Hit8;
|
||||
typedef HitK<16> Hit16;
|
||||
typedef HitK<VSIZEX> Hitx;
|
||||
|
||||
/* Outputs hit to stream */
|
||||
template<int K>
|
||||
__forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray)
|
||||
{
|
||||
cout << "{ " << embree_endl
|
||||
<< " Ng = " << ray.Ng << embree_endl
|
||||
<< " u = " << ray.u << embree_endl
|
||||
<< " v = " << ray.v << embree_endl
|
||||
<< " primID = " << ray.primID << embree_endl
|
||||
<< " geomID = " << ray.geomID << embree_endl
|
||||
<< " instID =";
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
|
||||
{
|
||||
cout << " " << ray.instID[l];
|
||||
}
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
cout << " instPrimID =";
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
|
||||
{
|
||||
cout << " " << ray.instPrimID[l];
|
||||
}
|
||||
#endif
|
||||
cout << embree_endl;
|
||||
return cout << "}";
|
||||
}
|
||||
|
||||
template<typename Hit>
|
||||
__forceinline void copyHitToRay(RayHit& ray, const Hit& hit)
|
||||
{
|
||||
ray.Ng = hit.Ng;
|
||||
ray.u = hit.u;
|
||||
ray.v = hit.v;
|
||||
ray.primID = hit.primID;
|
||||
ray.geomID = hit.geomID;
|
||||
instance_id_stack::copy_UU(hit.instID, ray.instID);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
instance_id_stack::copy_UU(hit.instPrimID, ray.instPrimID);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline void copyHitToRay(const vbool<K>& mask, RayHitK<K>& ray, const HitK<K>& hit)
|
||||
{
|
||||
vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x);
|
||||
vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y);
|
||||
vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z);
|
||||
vfloat<K>::storeu(mask,&ray.u, hit.u);
|
||||
vfloat<K>::storeu(mask,&ray.v, hit.v);
|
||||
vuint<K>::storeu(mask,&ray.primID, hit.primID);
|
||||
vuint<K>::storeu(mask,&ray.geomID, hit.geomID);
|
||||
instance_id_stack::copy_VV<K>(hit.instID, ray.instID, mask);
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
instance_id_stack::copy_VV<K>(hit.instPrimID, ray.instPrimID, mask);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
265
engine/thirdparty/embree/kernels/common/instance_stack.h
vendored
Normal file
265
engine/thirdparty/embree/kernels/common/instance_stack.h
vendored
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "rtcore.h"
|
||||
|
||||
namespace embree {
|
||||
namespace instance_id_stack {
|
||||
|
||||
static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0,
|
||||
"RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0.");
|
||||
|
||||
/*******************************************************************************
|
||||
* Instance ID stack manipulation.
|
||||
* This is used from the instance intersector.
|
||||
******************************************************************************/
|
||||
|
||||
/*
|
||||
* Push an instance to the stack.
|
||||
*/
|
||||
template<typename Context>
|
||||
RTC_FORCEINLINE bool push(Context context,
|
||||
unsigned instanceId,
|
||||
unsigned instancePrimId)
|
||||
{
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT;
|
||||
/* We assert here because instances are silently dropped when the stack is full.
|
||||
This might be quite hard to find in production. */
|
||||
assert(spaceAvailable);
|
||||
if (likely(spaceAvailable)) {
|
||||
context->instID[context->instStackSize] = instanceId;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[context->instStackSize] = instancePrimId;
|
||||
#endif
|
||||
context->instStackSize++;
|
||||
}
|
||||
return spaceAvailable;
|
||||
#else
|
||||
const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID);
|
||||
assert(spaceAvailable);
|
||||
if (likely(spaceAvailable)) {
|
||||
context->instID[0] = instanceId;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[0] = instancePrimId;
|
||||
#endif
|
||||
}
|
||||
return spaceAvailable;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Pop the last instance pushed to the stack.
|
||||
* Do not call on an empty stack.
|
||||
*/
|
||||
template<typename Context>
|
||||
RTC_FORCEINLINE void pop(Context context)
|
||||
{
|
||||
assert(context);
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
assert(context->instStackSize > 0);
|
||||
--context->instStackSize;
|
||||
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
#else
|
||||
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
|
||||
context->instID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[0] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* Push an instance to the stack. Used for point queries*/
|
||||
RTC_FORCEINLINE bool push(RTCPointQueryContext* context,
|
||||
unsigned int instanceId,
|
||||
unsigned int instancePrimId,
|
||||
AffineSpace3fa const& w2i,
|
||||
AffineSpace3fa const& i2w)
|
||||
{
|
||||
assert(context);
|
||||
const size_t stackSize = context->instStackSize;
|
||||
assert(stackSize < RTC_MAX_INSTANCE_LEVEL_COUNT);
|
||||
context->instID[stackSize] = instanceId;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[stackSize] = instancePrimId;
|
||||
#endif
|
||||
|
||||
AffineSpace3fa_store_unaligned(w2i,(AffineSpace3fa*)context->world2inst[stackSize]);
|
||||
AffineSpace3fa_store_unaligned(i2w,(AffineSpace3fa*)context->inst2world[stackSize]);
|
||||
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
if (unlikely(stackSize > 0))
|
||||
{
|
||||
const AffineSpace3fa world2inst = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize ])
|
||||
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize-1]);
|
||||
const AffineSpace3fa inst2world = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize-1])
|
||||
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize ]);
|
||||
AffineSpace3fa_store_unaligned(world2inst,(AffineSpace3fa*)context->world2inst[stackSize]);
|
||||
AffineSpace3fa_store_unaligned(inst2world,(AffineSpace3fa*)context->inst2world[stackSize]);
|
||||
}
|
||||
#endif
|
||||
context->instStackSize++;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<>
|
||||
RTC_FORCEINLINE void pop(RTCPointQueryContext* context)
|
||||
{
|
||||
assert(context);
|
||||
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
|
||||
assert(context->instStackSize > 0);
|
||||
#else
|
||||
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
|
||||
#endif
|
||||
--context->instStackSize;
|
||||
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
|
||||
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
|
||||
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimized instance id stack copy.
|
||||
* The copy() functions will either copy full
|
||||
* stacks or copy only until the last valid element has been copied, depending
|
||||
* on RTC_MAX_INSTANCE_LEVEL_COUNT.
|
||||
*/
|
||||
RTC_FORCEINLINE void copy_UU(const unsigned* src, unsigned* tgt)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0] = src[0];
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
tgt[l] = src[l];
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
RTC_FORCEINLINE void copy_UU(const RTCRayQueryContext* context, const unsigned* src, unsigned* tgt)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0] = src[0];
|
||||
|
||||
#else
|
||||
|
||||
unsigned int depth = context->instStackSize;
|
||||
|
||||
for (unsigned l = 0; l < depth; ++l)
|
||||
tgt[l] = src[l];
|
||||
|
||||
for (unsigned l = depth; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
|
||||
tgt[l] = RTC_INVALID_GEOMETRY_ID;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0] = src[0];
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
tgt[l] = src[l];
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, size_t j)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0][j] = src[0];
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
tgt[l][j] = src[l];
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, const vbool<K>& mask)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
vuint<K>::store(mask, tgt, src[0]);
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
vuint<K>::store(mask, tgt + l, src[l]);
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_VU(const vuint<K>* src, unsigned* tgt, size_t i)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0] = src[0][i];
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
tgt[l] = src[l][i];
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, size_t i, size_t j)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
tgt[0][j] = src[0][i];
|
||||
|
||||
#else
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
tgt[l][j] = src[l][i];
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
|
||||
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int K>
|
||||
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask)
|
||||
{
|
||||
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
|
||||
vuint<K>::store(mask, tgt, src[0]);
|
||||
|
||||
#else
|
||||
vbool<K> done = !mask;
|
||||
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
|
||||
vuint<K>::store(mask, tgt + l, src[l]);
|
||||
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) {
|
||||
done |= src[l] == RTC_INVALID_GEOMETRY_ID;
|
||||
if (all(done)) break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace instance_id_stack
|
||||
} // namespace embree
|
||||
246
engine/thirdparty/embree/kernels/common/isa.h
vendored
Normal file
246
engine/thirdparty/embree/kernels/common/isa.h
vendored
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/sys/platform.h"
|
||||
#include "../../common/sys/sysinfo.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#define DEFINE_SYMBOL2(type,name) \
|
||||
typedef type (*name##Func)(); \
|
||||
name##Func name;
|
||||
|
||||
#define DECLARE_SYMBOL2(type,name) \
|
||||
namespace sse2 { extern type name(); } \
|
||||
namespace sse42 { extern type name(); } \
|
||||
namespace avx { extern type name(); } \
|
||||
namespace avx2 { extern type name(); } \
|
||||
namespace avx512 { extern type name(); } \
|
||||
void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \
|
||||
type name##_error() { return type(name##_error2); } \
|
||||
type name##_zero() { return type(nullptr); }
|
||||
|
||||
#define DECLARE_ISA_FUNCTION(type,symbol,args) \
|
||||
namespace sse2 { extern type symbol(args); } \
|
||||
namespace sse42 { extern type symbol(args); } \
|
||||
namespace avx { extern type symbol(args); } \
|
||||
namespace avx2 { extern type symbol(args); } \
|
||||
namespace avx512 { extern type symbol(args); } \
|
||||
inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \
|
||||
typedef type (*symbol##Ty)(args); \
|
||||
|
||||
#define DEFINE_ISA_FUNCTION(type,symbol,args) \
|
||||
typedef type (*symbol##Func)(args); \
|
||||
symbol##Func symbol;
|
||||
|
||||
#define ZERO_SYMBOL(features,intersector) \
|
||||
intersector = intersector##_zero;
|
||||
|
||||
#define INIT_SYMBOL(features,intersector) \
|
||||
intersector = decltype(intersector)(intersector##_error);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT(features,intersector) \
|
||||
intersector = isa::intersector;
|
||||
|
||||
#if defined(__SSE__) || defined(__ARM_NEON)
|
||||
#if !defined(EMBREE_TARGET_SIMD4)
|
||||
#define EMBREE_TARGET_SIMD4
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_SSE42)
|
||||
#define SELECT_SYMBOL_SSE42(features,intersector) \
|
||||
if ((features & SSE42) == SSE42) intersector = sse42::intersector;
|
||||
#else
|
||||
#define SELECT_SYMBOL_SSE42(features,intersector)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_AVX) || defined(__AVX__)
|
||||
#if !defined(EMBREE_TARGET_SIMD8)
|
||||
#define EMBREE_TARGET_SIMD8
|
||||
#endif
|
||||
#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target
|
||||
#define SELECT_SYMBOL_AVX(features,intersector) \
|
||||
if ((features & ISA) == ISA) intersector = isa::intersector;
|
||||
#else
|
||||
#define SELECT_SYMBOL_AVX(features,intersector) \
|
||||
if ((features & AVX) == AVX) intersector = avx::intersector;
|
||||
#endif
|
||||
#else
|
||||
#define SELECT_SYMBOL_AVX(features,intersector)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_AVX2)
|
||||
#if !defined(EMBREE_TARGET_SIMD8)
|
||||
#define EMBREE_TARGET_SIMD8
|
||||
#endif
|
||||
#define SELECT_SYMBOL_AVX2(features,intersector) \
|
||||
if ((features & AVX2) == AVX2) intersector = avx2::intersector;
|
||||
#else
|
||||
#define SELECT_SYMBOL_AVX2(features,intersector)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_TARGET_AVX512)
|
||||
#if !defined(EMBREE_TARGET_SIMD16)
|
||||
#define EMBREE_TARGET_SIMD16
|
||||
#endif
|
||||
#define SELECT_SYMBOL_AVX512(features,intersector) \
|
||||
if ((features & AVX512) == AVX512) intersector = avx512::intersector;
|
||||
#else
|
||||
#define SELECT_SYMBOL_AVX512(features,intersector)
|
||||
#endif
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX_AVX512(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512(features,intersector) \
|
||||
ZERO_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
|
||||
SELECT_SYMBOL_DEFAULT(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_INIT_AVX512(features,intersector) \
|
||||
INIT_SYMBOL(features,intersector); \
|
||||
SELECT_SYMBOL_AVX512(features,intersector);
|
||||
|
||||
#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \
|
||||
SELECT_SYMBOL_SSE42(features,intersector); \
|
||||
SELECT_SYMBOL_AVX(features,intersector); \
|
||||
SELECT_SYMBOL_AVX2(features,intersector);
|
||||
|
||||
struct VerifyMultiTargetLinking {
|
||||
static __noinline int getISA(int depth = 5) {
|
||||
if (depth == 0) return ISA;
|
||||
else return getISA(depth-1);
|
||||
}
|
||||
};
|
||||
namespace sse2 { int getISA(); };
|
||||
namespace sse42 { int getISA(); };
|
||||
namespace avx { int getISA(); };
|
||||
namespace avx2 { int getISA(); };
|
||||
namespace avx512 { int getISA(); };
|
||||
}
|
||||
325
engine/thirdparty/embree/kernels/common/motion_derivative.h
vendored
Normal file
325
engine/thirdparty/embree/kernels/common/motion_derivative.h
vendored
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../common/math/affinespace.h"
|
||||
#include "../../common/math/interval.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
namespace embree {
|
||||
|
||||
#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f
|
||||
|
||||
static void motion_derivative_coefficients(const float *p, float *coeff);
|
||||
|
||||
struct MotionDerivativeCoefficients
|
||||
{
|
||||
float theta;
|
||||
float coeffs[3*8*7];
|
||||
|
||||
MotionDerivativeCoefficients() {}
|
||||
|
||||
// xfm0 and xfm1 are interpret as quaternion decomposition
|
||||
MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1)
|
||||
{
|
||||
// cosTheta of the two quaternions
|
||||
const float cosTheta = min(1.f, max(-1.f,
|
||||
xfm0.l.vx.w * xfm1.l.vx.w
|
||||
+ xfm0.l.vy.w * xfm1.l.vy.w
|
||||
+ xfm0.l.vz.w * xfm1.l.vz.w
|
||||
+ xfm0.p.w * xfm1.p.w));
|
||||
|
||||
theta = std::acos(cosTheta);
|
||||
Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w);
|
||||
if (cosTheta < 0.995f) {
|
||||
// compute perpendicular quaternion
|
||||
qperp.x = xfm1.p.w - cosTheta * xfm0.p.w;
|
||||
qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w;
|
||||
qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w;
|
||||
qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w;
|
||||
qperp = normalize(qperp);
|
||||
}
|
||||
const float p[33] = {
|
||||
theta,
|
||||
xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0
|
||||
xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1
|
||||
xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0
|
||||
qperp.x, qperp.y, qperp.z, qperp.w,
|
||||
xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0
|
||||
xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y,
|
||||
xfm0.l.vz.z, xfm0.p.z,
|
||||
xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1
|
||||
xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y,
|
||||
xfm1.l.vz.z, xfm1.p.z
|
||||
};
|
||||
motion_derivative_coefficients(p, coeffs);
|
||||
}
|
||||
};
|
||||
|
||||
struct MotionDerivative
|
||||
{
|
||||
float twoTheta;
|
||||
float c[8];
|
||||
|
||||
MotionDerivative(MotionDerivativeCoefficients const& mdc,
|
||||
int dim, Vec3fa const& p0, Vec3fa const& p1)
|
||||
: twoTheta(2.f*mdc.theta)
|
||||
{
|
||||
const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z };
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
c[i] = 0;
|
||||
for (int j = 0; j < 7; ++j) {
|
||||
c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct EvalMotionDerivative
|
||||
{
|
||||
MotionDerivative const& md;
|
||||
float offset;
|
||||
|
||||
EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {}
|
||||
|
||||
T operator()(T const& time) const {
|
||||
return md.c[0] + md.c[1] * time
|
||||
+ (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time)
|
||||
+ (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time)
|
||||
+ offset;
|
||||
}
|
||||
};
|
||||
|
||||
unsigned int findRoots(
|
||||
Interval1f const& interval,
|
||||
float offset,
|
||||
float* roots,
|
||||
unsigned int maxNumRoots)
|
||||
{
|
||||
unsigned int numRoots = 0;
|
||||
EvalMotionDerivative<Interval1f> eval(*this, offset);
|
||||
findRoots(eval, interval, numRoots, roots, maxNumRoots);
|
||||
return numRoots;
|
||||
}
|
||||
|
||||
template<typename Eval>
|
||||
static void findRoots(
|
||||
|
||||
Eval const& eval,
|
||||
Interval1f const& interval,
|
||||
unsigned int& numRoots,
|
||||
float* roots,
|
||||
unsigned int maxNumRoots)
|
||||
{
|
||||
Interval1f range = eval(interval);
|
||||
if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return;
|
||||
|
||||
const float split = 0.5f * (interval.upper + interval.lower);
|
||||
if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f)
|
||||
{
|
||||
// check if the root already exists
|
||||
for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) {
|
||||
if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON)
|
||||
return;
|
||||
}
|
||||
if (numRoots < maxNumRoots) {
|
||||
roots[numRoots++] = split;
|
||||
}
|
||||
if (numRoots > maxNumRoots) {
|
||||
printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots);
|
||||
findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots);
|
||||
}
|
||||
};
|
||||
|
||||
/******************************************************************************
|
||||
* Code generated with sympy 1.4 *
|
||||
* See http://www.sympy.org/ for more information. *
|
||||
* *
|
||||
* see *
|
||||
* *
|
||||
* scripts/generate_motion_derivative_coefficients.py *
|
||||
* *
|
||||
* for how this code is generated *
|
||||
* *
|
||||
******************************************************************************/
|
||||
static void motion_derivative_coefficients(const float *p, float *coeff)
|
||||
{
|
||||
coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27];
|
||||
coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24];
|
||||
coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25];
|
||||
coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26];
|
||||
coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15];
|
||||
coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16];
|
||||
coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17];
|
||||
coeff[7] = 0;
|
||||
coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24];
|
||||
coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25];
|
||||
coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26];
|
||||
coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24];
|
||||
coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25];
|
||||
coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26];
|
||||
coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27];
|
||||
coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24];
|
||||
coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25];
|
||||
coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26];
|
||||
coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15];
|
||||
coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16];
|
||||
coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17];
|
||||
coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0];
|
||||
coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24];
|
||||
coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25];
|
||||
coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26];
|
||||
coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24];
|
||||
coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25];
|
||||
coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26];
|
||||
coeff[28] = 0;
|
||||
coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0];
|
||||
coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0];
|
||||
coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0];
|
||||
coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0];
|
||||
coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0];
|
||||
coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0];
|
||||
coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27];
|
||||
coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24];
|
||||
coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25];
|
||||
coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26];
|
||||
coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15];
|
||||
coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16];
|
||||
coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17];
|
||||
coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0];
|
||||
coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24];
|
||||
coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25];
|
||||
coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26];
|
||||
coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24];
|
||||
coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25];
|
||||
coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26];
|
||||
coeff[49] = 0;
|
||||
coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0];
|
||||
coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0];
|
||||
coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0];
|
||||
coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0];
|
||||
coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0];
|
||||
coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0];
|
||||
coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30];
|
||||
coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24];
|
||||
coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28];
|
||||
coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29];
|
||||
coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15];
|
||||
coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19];
|
||||
coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20];
|
||||
coeff[63] = 0;
|
||||
coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
|
||||
coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28];
|
||||
coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29];
|
||||
coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
|
||||
coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28];
|
||||
coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29];
|
||||
coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30];
|
||||
coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24];
|
||||
coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28];
|
||||
coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29];
|
||||
coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15];
|
||||
coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19];
|
||||
coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20];
|
||||
coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0];
|
||||
coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
|
||||
coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28];
|
||||
coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29];
|
||||
coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
|
||||
coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28];
|
||||
coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29];
|
||||
coeff[84] = 0;
|
||||
coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0];
|
||||
coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0];
|
||||
coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0];
|
||||
coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0];
|
||||
coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0];
|
||||
coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0];
|
||||
coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30];
|
||||
coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24];
|
||||
coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28];
|
||||
coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29];
|
||||
coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15];
|
||||
coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19];
|
||||
coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20];
|
||||
coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0];
|
||||
coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24];
|
||||
coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28];
|
||||
coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29];
|
||||
coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24];
|
||||
coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28];
|
||||
coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29];
|
||||
coeff[105] = 0;
|
||||
coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0];
|
||||
coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0];
|
||||
coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0];
|
||||
coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0];
|
||||
coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0];
|
||||
coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0];
|
||||
coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32];
|
||||
coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24];
|
||||
coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28];
|
||||
coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31];
|
||||
coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15];
|
||||
coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19];
|
||||
coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22];
|
||||
coeff[119] = 0;
|
||||
coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
|
||||
coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
|
||||
coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31];
|
||||
coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
|
||||
coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
|
||||
coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31];
|
||||
coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30];
|
||||
coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24];
|
||||
coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28];
|
||||
coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29];
|
||||
coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15];
|
||||
coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19];
|
||||
coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20];
|
||||
coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0];
|
||||
coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
|
||||
coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
|
||||
coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29];
|
||||
coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
|
||||
coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
|
||||
coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29];
|
||||
coeff[140] = 0;
|
||||
coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0];
|
||||
coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0];
|
||||
coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0];
|
||||
coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0];
|
||||
coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0];
|
||||
coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0];
|
||||
coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32];
|
||||
coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24];
|
||||
coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28];
|
||||
coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31];
|
||||
coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15];
|
||||
coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19];
|
||||
coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22];
|
||||
coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0];
|
||||
coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24];
|
||||
coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28];
|
||||
coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31];
|
||||
coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24];
|
||||
coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28];
|
||||
coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31];
|
||||
coeff[161] = 0;
|
||||
coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0];
|
||||
coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0];
|
||||
coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0];
|
||||
coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0];
|
||||
coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0];
|
||||
coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0];
|
||||
}
|
||||
|
||||
} // namespace embree
|
||||
137
engine/thirdparty/embree/kernels/common/point_query.h
vendored
Normal file
137
engine/thirdparty/embree/kernels/common/point_query.h
vendored
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* Point query structure for closest point query */
|
||||
template<int K>
|
||||
struct RTC_ALIGN(16) PointQueryK
|
||||
{
|
||||
/* Default construction does nothing */
|
||||
__forceinline PointQueryK() {}
|
||||
|
||||
/* Constructs a ray from origin, direction, and ray segment. Near
|
||||
* has to be smaller than far */
|
||||
__forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero)
|
||||
: p(p), time(time), radius(radius) {}
|
||||
|
||||
/* Returns the size of the ray */
|
||||
static __forceinline size_t size() { return K; }
|
||||
|
||||
/* Calculates if this is a valid ray that does not cause issues during traversal */
|
||||
__forceinline vbool<K> valid() const
|
||||
{
|
||||
const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE));
|
||||
const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE));
|
||||
const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE));
|
||||
const vbool<K> vn = radius >= vfloat<K>(0);
|
||||
const vbool<K> vf = abs(time) < vfloat<K>(inf);
|
||||
return vx & vy & vz & vn & vf;
|
||||
}
|
||||
|
||||
__forceinline void get(PointQueryK<1>* ray) const;
|
||||
__forceinline void get(size_t i, PointQueryK<1>& ray) const;
|
||||
__forceinline void set(const PointQueryK<1>* ray);
|
||||
__forceinline void set(size_t i, const PointQueryK<1>& ray);
|
||||
|
||||
Vec3vf<K> p; // location of the query point
|
||||
vfloat<K> time; // time for motion blur
|
||||
vfloat<K> radius; // radius for the point query
|
||||
};
|
||||
|
||||
/* Specialization for a single point query */
|
||||
template<>
|
||||
struct RTC_ALIGN(16) PointQueryK<1>
|
||||
{
|
||||
/* Default construction does nothing */
|
||||
__forceinline PointQueryK() {}
|
||||
|
||||
/* Constructs a ray from origin, direction, and ray segment. Near
|
||||
* has to be smaller than far */
|
||||
__forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero)
|
||||
: p(p), time(time), radius(radius) {}
|
||||
|
||||
/* Calculates if this is a valid ray that does not cause issues during traversal */
|
||||
__forceinline bool valid() const {
|
||||
return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf);
|
||||
}
|
||||
|
||||
Vec3f p;
|
||||
float time;
|
||||
float radius;
|
||||
};
|
||||
|
||||
/* Converts point query packet to single point query */
|
||||
template<int K>
|
||||
__forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const
|
||||
{
|
||||
for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
|
||||
{
|
||||
query[i].p.x = p.x[i];
|
||||
query[i].p.y = p.y[i];
|
||||
query[i].p.z = p.z[i];
|
||||
query[i].time = time[i];
|
||||
query[i].radius = radius[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Extracts a single point query out of a point query packet*/
|
||||
template<int K>
|
||||
__forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const
|
||||
{
|
||||
query.p.x = p.x[i];
|
||||
query.p.y = p.y[i];
|
||||
query.p.z = p.z[i];
|
||||
query.radius = radius[i];
|
||||
query.time = time[i];
|
||||
}
|
||||
|
||||
/* Converts single point query to point query packet */
|
||||
template<int K>
|
||||
__forceinline void PointQueryK<K>::set(const PointQueryK<1>* query)
|
||||
{
|
||||
for (size_t i = 0; i < K; i++)
|
||||
{
|
||||
p.x[i] = query[i].p.x;
|
||||
p.y[i] = query[i].p.y;
|
||||
p.z[i] = query[i].p.z;
|
||||
radius[i] = query[i].radius;
|
||||
time[i] = query[i].time;
|
||||
}
|
||||
}
|
||||
|
||||
/* inserts a single point query into a point query packet element */
|
||||
template<int K>
|
||||
__forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query)
|
||||
{
|
||||
p.x[i] = query.p.x;
|
||||
p.y[i] = query.p.y;
|
||||
p.z[i] = query.p.z;
|
||||
radius[i] = query.radius;
|
||||
time[i] = query.time;
|
||||
}
|
||||
|
||||
/* Shortcuts */
|
||||
typedef PointQueryK<1> PointQuery;
|
||||
typedef PointQueryK<4> PointQuery4;
|
||||
typedef PointQueryK<8> PointQuery8;
|
||||
typedef PointQueryK<16> PointQuery16;
|
||||
typedef PointQueryK<VSIZEX> PointQueryx;
|
||||
struct PointQueryN;
|
||||
|
||||
/* Outputs point query to stream */
|
||||
template<int K>
|
||||
__forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query)
|
||||
{
|
||||
cout << "{ " << embree_endl
|
||||
<< " p = " << query.p << embree_endl
|
||||
<< " r = " << query.radius << embree_endl
|
||||
<< " time = " << query.time << embree_endl
|
||||
<< "}";
|
||||
return cout;
|
||||
}
|
||||
}
|
||||
159
engine/thirdparty/embree/kernels/common/profile.h
vendored
Normal file
159
engine/thirdparty/embree/kernels/common/profile.h
vendored
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! helper structure for the implementation of the profile functions below */
|
||||
struct ProfileTimer
|
||||
{
|
||||
static const size_t N = 20;
|
||||
|
||||
ProfileTimer () {}
|
||||
|
||||
ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
|
||||
{
|
||||
for (size_t i=0; i<N; i++) names[i] = nullptr;
|
||||
for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
|
||||
for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
|
||||
for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
|
||||
for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
|
||||
}
|
||||
|
||||
__forceinline void begin()
|
||||
{
|
||||
j=0;
|
||||
t0 = tj = getSeconds();
|
||||
}
|
||||
|
||||
__forceinline void end() {
|
||||
absolute("total");
|
||||
i++;
|
||||
}
|
||||
|
||||
__forceinline void operator() (const char* name) {
|
||||
relative(name);
|
||||
}
|
||||
|
||||
__forceinline void absolute (const char* name)
|
||||
{
|
||||
const double t1 = getSeconds();
|
||||
const double dt = t1-t0;
|
||||
assert(names[j] == nullptr || names[j] == name);
|
||||
names[j] = name;
|
||||
if (i == 0) dt_fst[j] = dt;
|
||||
if (i>=numSkip) {
|
||||
dt_min[j] = min(dt_min[j],dt);
|
||||
dt_avg[j] = dt_avg[j] + dt;
|
||||
dt_max[j] = max(dt_max[j],dt);
|
||||
}
|
||||
j++;
|
||||
maxJ = max(maxJ,j);
|
||||
}
|
||||
|
||||
__forceinline void relative (const char* name)
|
||||
{
|
||||
const double t1 = getSeconds();
|
||||
const double dt = t1-tj;
|
||||
tj = t1;
|
||||
assert(names[j] == nullptr || names[j] == name);
|
||||
names[j] = name;
|
||||
if (i == 0) dt_fst[j] = dt;
|
||||
if (i>=numSkip) {
|
||||
dt_min[j] = min(dt_min[j],dt);
|
||||
dt_avg[j] = dt_avg[j] + dt;
|
||||
dt_max[j] = max(dt_max[j],dt);
|
||||
}
|
||||
j++;
|
||||
maxJ = max(maxJ,j);
|
||||
}
|
||||
|
||||
void print(size_t numElements)
|
||||
{
|
||||
for (size_t k=0; k<N; k++)
|
||||
dt_avg[k] /= double(i-numSkip);
|
||||
|
||||
printf(" profile [M/s]:\n");
|
||||
for (size_t j=0; j<maxJ; j++)
|
||||
printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
|
||||
names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
|
||||
|
||||
printf(" profile [ms]:\n");
|
||||
for (size_t j=0; j<maxJ; j++)
|
||||
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
|
||||
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
|
||||
}
|
||||
|
||||
void print()
|
||||
{
|
||||
printf(" profile:\n");
|
||||
|
||||
for (size_t k=0; k<N; k++)
|
||||
dt_avg[k] /= double(i-numSkip);
|
||||
|
||||
for (size_t j=0; j<maxJ; j++) {
|
||||
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
|
||||
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
|
||||
}
|
||||
}
|
||||
|
||||
double avg() {
|
||||
return dt_avg[maxJ-1]/double(i-numSkip);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t i;
|
||||
size_t j;
|
||||
size_t maxJ;
|
||||
size_t numSkip;
|
||||
double t0;
|
||||
double tj;
|
||||
const char* names[N];
|
||||
double dt_fst[N];
|
||||
double dt_min[N];
|
||||
double dt_avg[N];
|
||||
double dt_max[N];
|
||||
};
|
||||
|
||||
/*! This function executes some code block multiple times and measured sections of it.
|
||||
Use the following way:
|
||||
|
||||
profile(1,10,1000,[&](ProfileTimer& timer) {
|
||||
// code
|
||||
timer("A");
|
||||
// code
|
||||
timer("B");
|
||||
});
|
||||
*/
|
||||
template<typename Closure>
|
||||
void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
|
||||
{
|
||||
ProfileTimer timer(numSkip);
|
||||
|
||||
for (size_t i=0; i<numSkip+numIter; i++)
|
||||
{
|
||||
timer.begin();
|
||||
closure(timer);
|
||||
timer.end();
|
||||
}
|
||||
timer.print(numElements);
|
||||
}
|
||||
|
||||
/*! similar as the function above, but the timer object comes externally */
|
||||
template<typename Closure>
|
||||
void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
|
||||
{
|
||||
timer = ProfileTimer(numSkip);
|
||||
|
||||
for (size_t i=0; i<numSkip+numIter; i++)
|
||||
{
|
||||
timer.begin();
|
||||
closure(timer);
|
||||
timer.end();
|
||||
}
|
||||
timer.print(numElements);
|
||||
}
|
||||
}
|
||||
1635
engine/thirdparty/embree/kernels/common/ray.h
vendored
Normal file
1635
engine/thirdparty/embree/kernels/common/ray.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2067
engine/thirdparty/embree/kernels/common/rtcore.cpp
vendored
Normal file
2067
engine/thirdparty/embree/kernels/common/rtcore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
162
engine/thirdparty/embree/kernels/common/rtcore.h
vendored
Normal file
162
engine/thirdparty/embree/kernels/common/rtcore.h
vendored
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../include/embree4/rtcore.h"
|
||||
RTC_NAMESPACE_USE
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! decoding of intersection flags */
|
||||
__forceinline bool isCoherent (RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_COHERENT; }
|
||||
__forceinline bool isIncoherent(RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_INCOHERENT; }
|
||||
|
||||
/*! Macros used in the rtcore API implementation */
|
||||
// -- GODOT start --
|
||||
#define RTC_CATCH_BEGIN
|
||||
#define RTC_CATCH_END(device)
|
||||
#define RTC_CATCH_END2(scene)
|
||||
#define RTC_CATCH_END2_FALSE(scene) return false;
|
||||
#if 0
|
||||
// -- GODOT end --
|
||||
#define RTC_CATCH_BEGIN try {
|
||||
|
||||
#define RTC_CATCH_END(device) \
|
||||
} catch (std::bad_alloc&) { \
|
||||
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
} catch (rtcore_error& e) { \
|
||||
Device::process_error(device,e.error,e.what()); \
|
||||
} catch (std::exception& e) { \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
} catch (...) { \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
}
|
||||
|
||||
#define RTC_CATCH_END2(scene) \
|
||||
} catch (std::bad_alloc&) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
} catch (rtcore_error& e) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,e.error,e.what()); \
|
||||
} catch (std::exception& e) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
} catch (...) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
}
|
||||
|
||||
#define RTC_CATCH_END2_FALSE(scene) \
|
||||
} catch (std::bad_alloc&) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
return false; \
|
||||
} catch (rtcore_error& e) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,e.error,e.what()); \
|
||||
return false; \
|
||||
} catch (std::exception& e) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
return false; \
|
||||
} catch (...) { \
|
||||
Device* device = scene ? scene->device : nullptr; \
|
||||
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define RTC_VERIFY_HANDLE(handle) \
|
||||
if (handle == nullptr) { \
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
|
||||
}
|
||||
|
||||
#define RTC_VERIFY_GEOMID(id) \
|
||||
if (id == RTC_INVALID_GEOMETRY_ID) { \
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
|
||||
}
|
||||
|
||||
#define RTC_VERIFY_UPPER(id,upper) \
|
||||
if (id > upper) { \
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
|
||||
}
|
||||
|
||||
#define RTC_VERIFY_RANGE(id,lower,upper) \
|
||||
if (id < lower || id > upper) \
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds");
|
||||
|
||||
#if 0 // enable to debug print all API calls
|
||||
#define RTC_TRACE(x) std::cout << #x << std::endl;
|
||||
#else
|
||||
#define RTC_TRACE(x)
|
||||
#endif
|
||||
|
||||
// -- GODOT start --
|
||||
#if 0
|
||||
/*! used to throw embree API errors */
|
||||
struct rtcore_error : public std::exception
|
||||
{
|
||||
__forceinline rtcore_error(RTCError error, const std::string& str)
|
||||
: error(error), str(str) {}
|
||||
|
||||
~rtcore_error() throw() {}
|
||||
|
||||
const char* what () const throw () {
|
||||
return str.c_str();
|
||||
}
|
||||
|
||||
RTCError error;
|
||||
std::string str;
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(DEBUG) // only report file and line in debug mode
|
||||
#define throw_RTCError(error,str) \
|
||||
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
|
||||
// throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
#else
|
||||
#define throw_RTCError(error,str) \
|
||||
abort();
|
||||
// throw rtcore_error(error,str);
|
||||
#endif
|
||||
// -- GODOT end --
|
||||
|
||||
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
|
||||
(settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
|
||||
|
||||
|
||||
inline void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
|
||||
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x;
|
||||
xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y;
|
||||
xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z;
|
||||
break;
|
||||
|
||||
case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
|
||||
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z;
|
||||
xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z;
|
||||
xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z;
|
||||
xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z;
|
||||
break;
|
||||
|
||||
case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
|
||||
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f;
|
||||
xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f;
|
||||
xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f;
|
||||
xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f;
|
||||
break;
|
||||
|
||||
default:
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
442
engine/thirdparty/embree/kernels/common/rtcore_builder.cpp
vendored
Normal file
442
engine/thirdparty/embree/kernels/common/rtcore_builder.cpp
vendored
Normal file
|
|
@ -0,0 +1,442 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTC_EXPORT_API
|
||||
|
||||
#include "default.h"
|
||||
#include "device.h"
|
||||
#include "scene.h"
|
||||
#include "context.h"
|
||||
#include "alloc.h"
|
||||
|
||||
#include "../builders/bvh_builder_sah.h"
|
||||
#include "../builders/bvh_builder_morton.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa // FIXME: support more ISAs for builders
|
||||
{
|
||||
struct BVH : public RefCount
|
||||
{
|
||||
BVH (Device* device)
|
||||
: device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0)
|
||||
{
|
||||
device->refInc();
|
||||
}
|
||||
|
||||
~BVH() {
|
||||
device->refDec();
|
||||
}
|
||||
|
||||
public:
|
||||
Device* device;
|
||||
FastAllocator allocator;
|
||||
mvector<BVHBuilderMorton::BuildPrim> morton_src;
|
||||
mvector<BVHBuilderMorton::BuildPrim> morton_tmp;
|
||||
};
|
||||
|
||||
void* rtcBuildBVHMorton(const RTCBuildArguments* arguments)
|
||||
{
|
||||
BVH* bvh = (BVH*) arguments->bvh;
|
||||
RTCBuildPrimitive* prims_i = arguments->primitives;
|
||||
size_t primitiveCount = arguments->primitiveCount;
|
||||
RTCCreateNodeFunction createNode = arguments->createNode;
|
||||
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
|
||||
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
|
||||
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
|
||||
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
|
||||
void* userPtr = arguments->userPtr;
|
||||
|
||||
std::atomic<size_t> progress(0);
|
||||
|
||||
/* initialize temporary arrays for morton builder */
|
||||
PrimRef* prims = (PrimRef*) prims_i;
|
||||
mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src;
|
||||
mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp;
|
||||
morton_src.resize(primitiveCount);
|
||||
morton_tmp.resize(primitiveCount);
|
||||
|
||||
/* compute centroid bounds */
|
||||
const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa {
|
||||
|
||||
BBox3fa bounds(empty);
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
bounds.extend(prims[i].bounds().center2());
|
||||
return bounds;
|
||||
}, BBox3fa::merge);
|
||||
|
||||
/* compute morton codes */
|
||||
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
|
||||
parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) {
|
||||
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]);
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
generator(prims[i].bounds(),(unsigned) i);
|
||||
}
|
||||
});
|
||||
|
||||
/* start morton build */
|
||||
std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>(
|
||||
|
||||
/* thread local allocator for fast allocations */
|
||||
[&] () -> FastAllocator::CachedAllocator {
|
||||
return bvh->allocator.getCachedAllocator();
|
||||
},
|
||||
|
||||
/* lambda function that allocates BVH nodes */
|
||||
[&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* {
|
||||
return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
|
||||
},
|
||||
|
||||
/* lambda function that sets bounds */
|
||||
[&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa>
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
|
||||
const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
|
||||
for (size_t i=0; i<N; i++) {
|
||||
bounds.extend(children[i].second);
|
||||
childptrs[i] = children[i].first;
|
||||
cbounds[i] = (const RTCBounds*)&children[i].second;
|
||||
}
|
||||
setNodeBounds(node,cbounds,(unsigned int)N,userPtr);
|
||||
setNodeChildren(node,childptrs, (unsigned int)N,userPtr);
|
||||
return std::make_pair(node,bounds);
|
||||
},
|
||||
|
||||
/* lambda function that creates BVH leaves */
|
||||
[&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa>
|
||||
{
|
||||
RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF];
|
||||
BBox3fa bounds = empty;
|
||||
for (size_t i=0;i<current.size();i++)
|
||||
{
|
||||
const size_t id = morton_src[current.begin()+i].index;
|
||||
bounds.extend(prims[id].bounds());
|
||||
localBuildPrims[i] = prims_i[id];
|
||||
}
|
||||
void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr);
|
||||
return std::make_pair(node,bounds);
|
||||
},
|
||||
|
||||
/* lambda that calculates the bounds for some primitive */
|
||||
[&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa {
|
||||
return prims[morton.index].bounds();
|
||||
},
|
||||
|
||||
/* progress monitor function */
|
||||
[&] (size_t dn) {
|
||||
if (!buildProgress) return true;
|
||||
const size_t n = progress.fetch_add(dn)+dn;
|
||||
const double f = std::min(1.0,double(n)/double(primitiveCount));
|
||||
return buildProgress(userPtr,f);
|
||||
},
|
||||
|
||||
morton_src.data(),morton_tmp.data(),primitiveCount,
|
||||
*arguments);
|
||||
|
||||
bvh->allocator.cleanup();
|
||||
return root.first;
|
||||
}
|
||||
|
||||
void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments)
|
||||
{
|
||||
BVH* bvh = (BVH*) arguments->bvh;
|
||||
RTCBuildPrimitive* prims = arguments->primitives;
|
||||
size_t primitiveCount = arguments->primitiveCount;
|
||||
RTCCreateNodeFunction createNode = arguments->createNode;
|
||||
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
|
||||
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
|
||||
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
|
||||
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
|
||||
void* userPtr = arguments->userPtr;
|
||||
|
||||
std::atomic<size_t> progress(0);
|
||||
|
||||
/* calculate priminfo */
|
||||
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
|
||||
{
|
||||
CentGeomBBox3fa bounds(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
bounds.extend((BBox3fa&)prims[j]);
|
||||
return bounds;
|
||||
};
|
||||
const CentGeomBBox3fa bounds =
|
||||
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
|
||||
|
||||
const PrimInfo pinfo(0,primitiveCount,bounds);
|
||||
|
||||
/* build BVH */
|
||||
void* root = BVHBuilderBinnedSAH::build<void*>(
|
||||
|
||||
/* thread local allocator for fast allocations */
|
||||
[&] () -> FastAllocator::CachedAllocator {
|
||||
return bvh->allocator.getCachedAllocator();
|
||||
},
|
||||
|
||||
/* lambda function that creates BVH nodes */
|
||||
[&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
|
||||
{
|
||||
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
|
||||
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
|
||||
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
|
||||
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
|
||||
return node;
|
||||
},
|
||||
|
||||
/* lambda function that updates BVH nodes */
|
||||
[&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
|
||||
setNodeChildren(node,children, (unsigned int)N,userPtr);
|
||||
return node;
|
||||
},
|
||||
|
||||
/* lambda function that creates BVH leaves */
|
||||
[&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
|
||||
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
|
||||
},
|
||||
|
||||
/* progress monitor function */
|
||||
[&] (size_t dn) {
|
||||
if (!buildProgress) return true;
|
||||
const size_t n = progress.fetch_add(dn)+dn;
|
||||
const double f = std::min(1.0,double(n)/double(primitiveCount));
|
||||
return buildProgress(userPtr,f);
|
||||
},
|
||||
|
||||
(PrimRef*)prims,pinfo,*arguments);
|
||||
|
||||
bvh->allocator.cleanup();
|
||||
return root;
|
||||
}
|
||||
|
||||
static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) {
|
||||
CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first);
|
||||
unsigned int maxGeomID = max(a.second,b.second);
|
||||
return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID);
|
||||
}
|
||||
|
||||
void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments)
|
||||
{
|
||||
BVH* bvh = (BVH*) arguments->bvh;
|
||||
RTCBuildPrimitive* prims = arguments->primitives;
|
||||
size_t primitiveCount = arguments->primitiveCount;
|
||||
RTCCreateNodeFunction createNode = arguments->createNode;
|
||||
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
|
||||
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
|
||||
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
|
||||
RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive;
|
||||
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
|
||||
void* userPtr = arguments->userPtr;
|
||||
|
||||
std::atomic<size_t> progress(0);
|
||||
|
||||
/* calculate priminfo */
|
||||
|
||||
auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int>
|
||||
{
|
||||
CentGeomBBox3fa bounds(empty);
|
||||
unsigned maxGeomID = 0;
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
bounds.extend((BBox3fa&)prims[j]);
|
||||
maxGeomID = max(maxGeomID,prims[j].geomID);
|
||||
}
|
||||
return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID);
|
||||
};
|
||||
|
||||
|
||||
const std::pair<CentGeomBBox3fa,unsigned int> pair =
|
||||
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair);
|
||||
|
||||
CentGeomBBox3fa bounds = pair.first;
|
||||
const unsigned int maxGeomID = pair.second;
|
||||
|
||||
if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))))
|
||||
{
|
||||
/* fallback code for max geomID larger than threshold */
|
||||
return rtcBuildBVHBinnedSAH(arguments);
|
||||
}
|
||||
|
||||
const PrimInfo pinfo(0,primitiveCount,bounds);
|
||||
|
||||
/* function that splits a build primitive */
|
||||
struct Splitter
|
||||
{
|
||||
Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr)
|
||||
: splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {}
|
||||
|
||||
__forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const
|
||||
{
|
||||
prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK;
|
||||
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
|
||||
left_o.geomIDref() = geomID; left_o.primIDref() = primID;
|
||||
right_o.geomIDref() = geomID; right_o.primIDref() = primID;
|
||||
}
|
||||
|
||||
__forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const
|
||||
{
|
||||
PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID);
|
||||
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
|
||||
}
|
||||
|
||||
RTCSplitPrimitiveFunction splitPrimitive;
|
||||
unsigned geomID;
|
||||
unsigned primID;
|
||||
void* userPtr;
|
||||
};
|
||||
|
||||
/* build BVH */
|
||||
void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>(
|
||||
|
||||
/* thread local allocator for fast allocations */
|
||||
[&] () -> FastAllocator::CachedAllocator {
|
||||
return bvh->allocator.getCachedAllocator();
|
||||
},
|
||||
|
||||
/* lambda function that creates BVH nodes */
|
||||
[&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
|
||||
{
|
||||
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
|
||||
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
|
||||
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
|
||||
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
|
||||
return node;
|
||||
},
|
||||
|
||||
/* lambda function that updates BVH nodes */
|
||||
[&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
|
||||
setNodeChildren(node,children, (unsigned int)N,userPtr);
|
||||
return node;
|
||||
},
|
||||
|
||||
/* lambda function that creates BVH leaves */
|
||||
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
|
||||
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
|
||||
},
|
||||
|
||||
/* returns the splitter */
|
||||
[&] ( const PrimRef& prim ) -> Splitter {
|
||||
return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr);
|
||||
},
|
||||
|
||||
/* progress monitor function */
|
||||
[&] (size_t dn) {
|
||||
if (!buildProgress) return true;
|
||||
const size_t n = progress.fetch_add(dn)+dn;
|
||||
const double f = std::min(1.0,double(n)/double(primitiveCount));
|
||||
return buildProgress(userPtr,f);
|
||||
},
|
||||
|
||||
(PrimRef*)prims,
|
||||
arguments->primitiveArrayCapacity,
|
||||
pinfo,*arguments);
|
||||
|
||||
bvh->allocator.cleanup();
|
||||
return root;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using namespace embree;
|
||||
using namespace embree::isa;
|
||||
|
||||
RTC_NAMESPACE_BEGIN
|
||||
|
||||
RTC_API RTCBVH rtcNewBVH(RTCDevice device)
|
||||
{
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcNewAllocator);
|
||||
RTC_VERIFY_HANDLE(device);
|
||||
BVH* bvh = new BVH((Device*)device);
|
||||
return (RTCBVH) bvh->refInc();
|
||||
RTC_CATCH_END((Device*)device);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments)
|
||||
{
|
||||
BVH* bvh = (BVH*) arguments->bvh;
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcBuildBVH);
|
||||
RTC_VERIFY_HANDLE(bvh);
|
||||
RTC_VERIFY_HANDLE(arguments);
|
||||
RTC_VERIFY_HANDLE(arguments->createNode);
|
||||
RTC_VERIFY_HANDLE(arguments->setNodeChildren);
|
||||
RTC_VERIFY_HANDLE(arguments->setNodeBounds);
|
||||
RTC_VERIFY_HANDLE(arguments->createLeaf);
|
||||
|
||||
if (arguments->primitiveArrayCapacity < arguments->primitiveCount)
|
||||
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount")
|
||||
|
||||
/* initialize the allocator */
|
||||
bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
|
||||
bvh->allocator.reset();
|
||||
|
||||
/* switch between different builders based on quality level */
|
||||
if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
|
||||
return rtcBuildBVHMorton(arguments);
|
||||
else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
|
||||
return rtcBuildBVHBinnedSAH(arguments);
|
||||
else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) {
|
||||
if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount)
|
||||
return rtcBuildBVHBinnedSAH(arguments);
|
||||
else
|
||||
return rtcBuildBVHSpatialSAH(arguments);
|
||||
}
|
||||
else
|
||||
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality");
|
||||
|
||||
/* if we are in dynamic mode, then do not clear temporary data */
|
||||
if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC))
|
||||
{
|
||||
bvh->morton_src.clear();
|
||||
bvh->morton_tmp.clear();
|
||||
}
|
||||
|
||||
RTC_CATCH_END(bvh->device);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align)
|
||||
{
|
||||
FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator;
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcThreadLocalAlloc);
|
||||
return alloc->malloc0(bytes,align);
|
||||
RTC_CATCH_END(alloc->alloc->getDevice());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RTC_API void rtcMakeStaticBVH(RTCBVH hbvh)
|
||||
{
|
||||
BVH* bvh = (BVH*) hbvh;
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcStaticBVH);
|
||||
RTC_VERIFY_HANDLE(hbvh);
|
||||
bvh->morton_src.clear();
|
||||
bvh->morton_tmp.clear();
|
||||
RTC_CATCH_END(bvh->device);
|
||||
}
|
||||
|
||||
RTC_API void rtcRetainBVH(RTCBVH hbvh)
|
||||
{
|
||||
BVH* bvh = (BVH*) hbvh;
|
||||
Device* device = bvh ? bvh->device : nullptr;
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcRetainBVH);
|
||||
RTC_VERIFY_HANDLE(hbvh);
|
||||
bvh->refInc();
|
||||
RTC_CATCH_END(device);
|
||||
}
|
||||
|
||||
RTC_API void rtcReleaseBVH(RTCBVH hbvh)
|
||||
{
|
||||
BVH* bvh = (BVH*) hbvh;
|
||||
Device* device = bvh ? bvh->device : nullptr;
|
||||
RTC_CATCH_BEGIN;
|
||||
RTC_TRACE(rtcReleaseBVH);
|
||||
RTC_VERIFY_HANDLE(hbvh);
|
||||
bvh->refDec();
|
||||
RTC_CATCH_END(device);
|
||||
}
|
||||
|
||||
RTC_NAMESPACE_END
|
||||
1034
engine/thirdparty/embree/kernels/common/scene.cpp
vendored
Normal file
1034
engine/thirdparty/embree/kernels/common/scene.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
400
engine/thirdparty/embree/kernels/common/scene.h
vendored
Normal file
400
engine/thirdparty/embree/kernels/common/scene.h
vendored
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "device.h"
|
||||
#include "builder.h"
|
||||
#include "scene_triangle_mesh.h"
|
||||
#include "scene_quad_mesh.h"
|
||||
#include "scene_user_geometry.h"
|
||||
#include "scene_instance.h"
|
||||
#include "scene_instance_array.h"
|
||||
#include "scene_curves.h"
|
||||
#include "scene_line_segments.h"
|
||||
#include "scene_subdiv_mesh.h"
|
||||
#include "scene_grid_mesh.h"
|
||||
#include "scene_points.h"
|
||||
#include "../subdiv/tessellation_cache.h"
|
||||
|
||||
#include "acceln.h"
|
||||
#include "geometry.h"
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
#include "../sycl/rthwif_embree_builder.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct TaskGroup;
|
||||
|
||||
/*! Base class all scenes are derived from */
|
||||
class Scene : public AccelN
|
||||
{
|
||||
ALIGNED_CLASS_USM_(std::alignment_of<Scene>::value);
|
||||
|
||||
public:
|
||||
template<typename Ty, bool mblur = false>
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
Iterator () {}
|
||||
|
||||
Iterator (Scene* scene, bool all = false)
|
||||
: scene(scene), all(all) {}
|
||||
|
||||
__forceinline Ty* at(const size_t i)
|
||||
{
|
||||
Geometry* geom = scene->geometries[i].ptr;
|
||||
if (geom == nullptr) return nullptr;
|
||||
if (!all && !geom->isEnabled()) return nullptr;
|
||||
const size_t mask = geom->getTypeMask() & Ty::geom_type;
|
||||
if (!(mask)) return nullptr;
|
||||
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
|
||||
return (Ty*) geom;
|
||||
}
|
||||
|
||||
__forceinline Ty* operator[] (const size_t i) {
|
||||
return at(i);
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return scene->size();
|
||||
}
|
||||
|
||||
__forceinline size_t numPrimitives() const {
|
||||
return scene->getNumPrimitives(Ty::geom_type,mblur);
|
||||
}
|
||||
|
||||
__forceinline size_t maxPrimitivesPerGeometry()
|
||||
{
|
||||
size_t ret = 0;
|
||||
for (size_t i=0; i<scene->size(); i++) {
|
||||
Ty* mesh = at(i);
|
||||
if (mesh == nullptr) continue;
|
||||
ret = max(ret,mesh->size());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__forceinline unsigned int maxGeomID()
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
for (size_t i=0; i<scene->size(); i++) {
|
||||
Ty* mesh = at(i);
|
||||
if (mesh == nullptr) continue;
|
||||
ret = max(ret,(unsigned int)i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__forceinline unsigned maxTimeStepsPerGeometry()
|
||||
{
|
||||
unsigned ret = 0;
|
||||
for (size_t i=0; i<scene->size(); i++) {
|
||||
Ty* mesh = at(i);
|
||||
if (mesh == nullptr) continue;
|
||||
ret = max(ret,mesh->numTimeSteps);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
bool all;
|
||||
};
|
||||
|
||||
class Iterator2
|
||||
{
|
||||
public:
|
||||
Iterator2 () {}
|
||||
|
||||
Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur)
|
||||
: scene(scene), typemask(typemask), mblur(mblur) {}
|
||||
|
||||
__forceinline Geometry* at(const size_t i)
|
||||
{
|
||||
Geometry* geom = scene->geometries[i].ptr;
|
||||
if (geom == nullptr) return nullptr;
|
||||
if (!geom->isEnabled()) return nullptr;
|
||||
if (!(geom->getTypeMask() & typemask)) return nullptr;
|
||||
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
|
||||
return geom;
|
||||
}
|
||||
|
||||
__forceinline Geometry* operator[] (const size_t i) {
|
||||
return at(i);
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return scene->size();
|
||||
}
|
||||
|
||||
private:
|
||||
Scene* scene;
|
||||
Geometry::GTypeMask typemask;
|
||||
bool mblur;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! Scene construction */
|
||||
Scene (Device* device);
|
||||
|
||||
/*! Scene destruction */
|
||||
~Scene () noexcept;
|
||||
|
||||
private:
|
||||
|
||||
/*! class is non-copyable */
|
||||
Scene (const Scene& other) DELETED; // do not implement
|
||||
Scene& operator= (const Scene& other) DELETED; // do not implement
|
||||
|
||||
public:
|
||||
void createTriangleAccel();
|
||||
void createTriangleMBAccel();
|
||||
void createQuadAccel();
|
||||
void createQuadMBAccel();
|
||||
void createHairAccel();
|
||||
void createHairMBAccel();
|
||||
void createSubdivAccel();
|
||||
void createSubdivMBAccel();
|
||||
void createUserGeometryAccel();
|
||||
void createUserGeometryMBAccel();
|
||||
void createInstanceAccel();
|
||||
void createInstanceMBAccel();
|
||||
void createInstanceExpensiveAccel();
|
||||
void createInstanceExpensiveMBAccel();
|
||||
void createInstanceArrayAccel();
|
||||
void createInstanceArrayMBAccel();
|
||||
void createGridAccel();
|
||||
void createGridMBAccel();
|
||||
|
||||
/*! prints statistics about the scene */
|
||||
void printStatistics();
|
||||
|
||||
/*! clears the scene */
|
||||
void clear();
|
||||
|
||||
/*! detaches some geometry */
|
||||
void detachGeometry(size_t geomID);
|
||||
|
||||
void setBuildQuality(RTCBuildQuality quality_flags);
|
||||
RTCBuildQuality getBuildQuality() const;
|
||||
|
||||
void setSceneFlags(RTCSceneFlags scene_flags);
|
||||
RTCSceneFlags getSceneFlags() const;
|
||||
|
||||
void build_cpu_accels();
|
||||
void build_gpu_accels();
|
||||
void commit (bool join);
|
||||
void commit_task ();
|
||||
void build () {}
|
||||
|
||||
/* return number of geometries */
|
||||
__forceinline size_t size() const { return geometries.size(); }
|
||||
|
||||
/* bind geometry to the scene */
|
||||
unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
|
||||
|
||||
/* determines if scene is modified */
|
||||
__forceinline bool isModified() const { return modified; }
|
||||
|
||||
/* sets modified flag */
|
||||
__forceinline void setModified(bool f = true) {
|
||||
modified = f;
|
||||
}
|
||||
|
||||
__forceinline bool isGeometryModified(size_t geomID)
|
||||
{
|
||||
Ref<Geometry>& g = geometries[geomID];
|
||||
if (!g) return false;
|
||||
return g->getModCounter() > geometryModCounters_[geomID];
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void checkIfModifiedAndSet ();
|
||||
|
||||
public:
|
||||
|
||||
/* get mesh by ID */
|
||||
__forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
|
||||
__forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
|
||||
|
||||
template<typename Mesh>
|
||||
__forceinline Mesh* get(size_t i) {
|
||||
assert(i < geometries.size());
|
||||
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
|
||||
return (Mesh*)geometries[i].ptr;
|
||||
}
|
||||
template<typename Mesh>
|
||||
__forceinline const Mesh* get(size_t i) const {
|
||||
assert(i < geometries.size());
|
||||
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
|
||||
return (Mesh*)geometries[i].ptr;
|
||||
}
|
||||
|
||||
template<typename Mesh>
|
||||
__forceinline Mesh* getSafe(size_t i) {
|
||||
assert(i < geometries.size());
|
||||
if (geometries[i] == null) return nullptr;
|
||||
if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
|
||||
else return (Mesh*) geometries[i].ptr;
|
||||
}
|
||||
|
||||
__forceinline Ref<Geometry> get_locked(size_t i) {
|
||||
Lock<MutexSys> lock(geometriesMutex);
|
||||
assert(i < geometries.size());
|
||||
return geometries[i];
|
||||
}
|
||||
|
||||
/* flag decoding */
|
||||
__forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
|
||||
__forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; }
|
||||
__forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; }
|
||||
__forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); }
|
||||
__forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; }
|
||||
|
||||
__forceinline bool hasArgumentFilterFunction() const {
|
||||
return scene_flags & RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS;
|
||||
}
|
||||
|
||||
__forceinline bool hasGeometryFilterFunction() {
|
||||
return world.numFilterFunctions != 0;
|
||||
}
|
||||
|
||||
__forceinline bool hasFilterFunction() {
|
||||
return hasArgumentFilterFunction() || hasGeometryFilterFunction();
|
||||
}
|
||||
|
||||
void* createQBVH6Accel();
|
||||
|
||||
public:
|
||||
Device* device;
|
||||
|
||||
public:
|
||||
IDPool<unsigned,0xFFFFFFFE> id_pool;
|
||||
Device::vector<Ref<Geometry>> geometries = device; //!< list of all user geometries
|
||||
avector<unsigned int> geometryModCounters_;
|
||||
Device::vector<float*> vertices = device;
|
||||
|
||||
public:
|
||||
/* these are to detect if we need to recreate the acceleration structures */
|
||||
bool flags_modified;
|
||||
unsigned int enabled_geometry_types;
|
||||
|
||||
RTCSceneFlags scene_flags;
|
||||
RTCBuildQuality quality_flags;
|
||||
MutexSys buildMutex;
|
||||
MutexSys geometriesMutex;
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
public:
|
||||
BBox3f hwaccel_bounds = empty;
|
||||
AccelBuffer hwaccel;
|
||||
#endif
|
||||
|
||||
private:
|
||||
bool modified; //!< true if scene got modified
|
||||
|
||||
public:
|
||||
|
||||
std::unique_ptr<TaskGroup> taskGroup;
|
||||
|
||||
public:
|
||||
struct BuildProgressMonitorInterface : public BuildProgressMonitor {
|
||||
BuildProgressMonitorInterface(Scene* scene)
|
||||
: scene(scene) {}
|
||||
void operator() (size_t dn) const { scene->progressMonitor(double(dn)); }
|
||||
private:
|
||||
Scene* scene;
|
||||
};
|
||||
BuildProgressMonitorInterface progressInterface;
|
||||
RTCProgressMonitorFunction progress_monitor_function;
|
||||
void* progress_monitor_ptr;
|
||||
std::atomic<size_t> progress_monitor_counter;
|
||||
void progressMonitor(double nprims);
|
||||
void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr);
|
||||
|
||||
private:
|
||||
GeometryCounts world; //!< counts for geometry
|
||||
|
||||
public:
|
||||
|
||||
__forceinline size_t numPrimitives() const {
|
||||
return world.size();
|
||||
}
|
||||
|
||||
__forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
if (mask & Geometry::MTY_TRIANGLE_MESH)
|
||||
count += mblur ? world.numMBTriangles : world.numTriangles;
|
||||
|
||||
if (mask & Geometry::MTY_QUAD_MESH)
|
||||
count += mblur ? world.numMBQuads : world.numQuads;
|
||||
|
||||
if (mask & Geometry::MTY_CURVE2)
|
||||
count += mblur ? world.numMBLineSegments : world.numLineSegments;
|
||||
|
||||
if (mask & Geometry::MTY_CURVE4)
|
||||
count += mblur ? world.numMBBezierCurves : world.numBezierCurves;
|
||||
|
||||
if (mask & Geometry::MTY_POINTS)
|
||||
count += mblur ? world.numMBPoints : world.numPoints;
|
||||
|
||||
if (mask & Geometry::MTY_SUBDIV_MESH)
|
||||
count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches;
|
||||
|
||||
if (mask & Geometry::MTY_USER_GEOMETRY)
|
||||
count += mblur ? world.numMBUserGeometries : world.numUserGeometries;
|
||||
|
||||
if (mask & Geometry::MTY_INSTANCE_CHEAP)
|
||||
count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap;
|
||||
|
||||
if (mask & Geometry::MTY_INSTANCE_EXPENSIVE)
|
||||
count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive;
|
||||
|
||||
if (mask & Geometry::MTY_INSTANCE_ARRAY)
|
||||
count += mblur ? world.numMBInstanceArrays : world.numInstanceArrays;
|
||||
|
||||
if (mask & Geometry::MTY_GRID_MESH)
|
||||
count += mblur ? world.numMBGrids : world.numGrids;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
__forceinline size_t getNumSubPrimitives(Geometry::GTypeMask mask, bool mblur) const
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
if (mask & Geometry::MTY_GRID_MESH)
|
||||
count += mblur ? world.numMBSubGrids : world.numSubGrids;
|
||||
|
||||
Geometry::GTypeMask new_mask = (Geometry::GTypeMask)(mask & ~Geometry::MTY_GRID_MESH);
|
||||
count += getNumPrimitives(new_mask, mblur);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
template<typename Mesh, bool mblur>
|
||||
__forceinline unsigned getNumTimeSteps()
|
||||
{
|
||||
if (!mblur)
|
||||
return 1;
|
||||
|
||||
Scene::Iterator<Mesh,mblur> iter(this);
|
||||
return iter.maxTimeStepsPerGeometry();
|
||||
}
|
||||
|
||||
template<typename Mesh, bool mblur>
|
||||
__forceinline unsigned int getMaxGeomID()
|
||||
{
|
||||
Scene::Iterator<Mesh,mblur> iter(this);
|
||||
return iter.maxGeomID();
|
||||
}
|
||||
};
|
||||
}
|
||||
764
engine/thirdparty/embree/kernels/common/scene_curves.h
vendored
Normal file
764
engine/thirdparty/embree/kernels/common/scene_curves.h
vendored
Normal file
|
|
@ -0,0 +1,764 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "geometry.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#include "../subdiv/bezier_curve.h"
|
||||
#include "../subdiv/hermite_curve.h"
|
||||
#include "../subdiv/bspline_curve.h"
|
||||
#include "../subdiv/catmullrom_curve.h"
|
||||
#include "../subdiv/linear_bezier_patch.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! represents an array of bicubic bezier curves */
|
||||
struct CurveGeometry : public Geometry
|
||||
{
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4;
|
||||
|
||||
public:
|
||||
|
||||
/*! bezier curve construction */
|
||||
CurveGeometry (Device* device, Geometry::GType gtype);
|
||||
|
||||
public:
|
||||
void setMask(unsigned mask);
|
||||
void setNumTimeSteps (unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount (unsigned int N);
|
||||
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void commit();
|
||||
bool verify();
|
||||
void setTessellationRate(float N);
|
||||
void setMaxRadiusScale(float s);
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
|
||||
public:
|
||||
|
||||
/*! returns the number of vertices */
|
||||
__forceinline size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! returns the i'th curve */
|
||||
__forceinline const unsigned int& curve(size_t i) const {
|
||||
return curves[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline Vec3ff vertex(size_t i) const {
|
||||
return vertices0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th normal of the first time step */
|
||||
__forceinline Vec3fa normal(size_t i) const {
|
||||
return normals0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th tangent of the first time step */
|
||||
__forceinline Vec3ff tangent(size_t i) const {
|
||||
return tangents0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th normal derivative of the first time step */
|
||||
__forceinline Vec3fa dnormal(size_t i) const {
|
||||
return dnormals0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th radius of the first time step */
|
||||
__forceinline float radius(size_t i) const {
|
||||
return vertices0[i].w;
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
|
||||
return vertices[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th normal of itime'th timestep */
|
||||
__forceinline Vec3fa normal(size_t i, size_t itime) const {
|
||||
return normals[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th tangent of itime'th timestep */
|
||||
__forceinline Vec3ff tangent(size_t i, size_t itime) const {
|
||||
return tangents[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th normal derivative of itime'th timestep */
|
||||
__forceinline Vec3fa dnormal(size_t i, size_t itime) const {
|
||||
return dnormals[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th radius of itime'th timestep */
|
||||
__forceinline float radius(size_t i, size_t itime) const {
|
||||
return vertices[itime][i].w;
|
||||
}
|
||||
|
||||
/*! gathers the curve starting with i'th vertex */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const
|
||||
{
|
||||
p0 = vertex(i+0);
|
||||
p1 = vertex(i+1);
|
||||
p2 = vertex(i+2);
|
||||
p3 = vertex(i+3);
|
||||
}
|
||||
|
||||
/*! gathers the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const
|
||||
{
|
||||
p0 = vertex(i+0,itime);
|
||||
p1 = vertex(i+1,itime);
|
||||
p2 = vertex(i+2,itime);
|
||||
p3 = vertex(i+3,itime);
|
||||
}
|
||||
|
||||
/*! gathers the curve normals starting with i'th vertex */
|
||||
__forceinline void gather_normals(Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
|
||||
{
|
||||
n0 = normal(i+0);
|
||||
n1 = normal(i+1);
|
||||
n2 = normal(i+2);
|
||||
n3 = normal(i+3);
|
||||
}
|
||||
|
||||
/*! gathers the curve starting with i'th vertex */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
|
||||
{
|
||||
p0 = vertex(i+0);
|
||||
p1 = vertex(i+1);
|
||||
p2 = vertex(i+2);
|
||||
p3 = vertex(i+3);
|
||||
n0 = normal(i+0);
|
||||
n1 = normal(i+1);
|
||||
n2 = normal(i+2);
|
||||
n3 = normal(i+3);
|
||||
}
|
||||
|
||||
/*! gathers the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const
|
||||
{
|
||||
p0 = vertex(i+0,itime);
|
||||
p1 = vertex(i+1,itime);
|
||||
p2 = vertex(i+2,itime);
|
||||
p3 = vertex(i+3,itime);
|
||||
n0 = normal(i+0,itime);
|
||||
n1 = normal(i+1,itime);
|
||||
n2 = normal(i+2,itime);
|
||||
n3 = normal(i+3,itime);
|
||||
}
|
||||
|
||||
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void prefetchL1_vertices(size_t i) const
|
||||
{
|
||||
prefetchL1(vertices0.getPtr(i)+0);
|
||||
prefetchL1(vertices0.getPtr(i)+64);
|
||||
}
|
||||
|
||||
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void prefetchL2_vertices(size_t i) const
|
||||
{
|
||||
prefetchL2(vertices0.getPtr(i)+0);
|
||||
prefetchL2(vertices0.getPtr(i)+64);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3ff a0,a1,a2,a3;
|
||||
gather(a0,a1,a2,a3,i,itime);
|
||||
Vec3ff b0,b1,b2,b3;
|
||||
gather(b0,b1,b2,b3,i,itime+1);
|
||||
p0 = madd(Vec3ff(t0),a0,t1*b0);
|
||||
p1 = madd(Vec3ff(t0),a1,t1*b1);
|
||||
p2 = madd(Vec3ff(t0),a2,t1*b2);
|
||||
p3 = madd(Vec3ff(t0),a3,t1*b3);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather(p0,p1,p2,p3,i,time);
|
||||
else gather(p0,p1,p2,p3,i);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3;
|
||||
gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime);
|
||||
Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3;
|
||||
gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1);
|
||||
p0 = madd(Vec3ff(t0),a0,t1*b0);
|
||||
p1 = madd(Vec3ff(t0),a1,t1*b1);
|
||||
p2 = madd(Vec3ff(t0),a2,t1*b2);
|
||||
p3 = madd(Vec3ff(t0),a3,t1*b3);
|
||||
n0 = madd(Vec3ff(t0),an0,t1*bn0);
|
||||
n1 = madd(Vec3ff(t0),an1,t1*bn1);
|
||||
n2 = madd(Vec3ff(t0),an2,t1*bn2);
|
||||
n3 = madd(Vec3ff(t0),an3,t1*bn3);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time for mblur and non-mblur case */
|
||||
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather(p0,p1,p2,p3,n0,n1,n2,n3,i,time);
|
||||
else gather(p0,p1,p2,p3,n0,n1,n2,n3,i);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
|
||||
{
|
||||
Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3;
|
||||
unsigned int vertexID = curve(primID);
|
||||
gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime);
|
||||
SourceCurve3ff ccurve(v0,v1,v2,v3);
|
||||
SourceCurve3fa ncurve(n0,n1,n2,n3);
|
||||
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
|
||||
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
|
||||
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
|
||||
return clerp(curve0,curve1,ftime);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
|
||||
{
|
||||
float ftime = 0.0f;
|
||||
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
|
||||
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
|
||||
if (hasMotionBlur()) {
|
||||
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
|
||||
return clerp(curve0,curve1,ftime);
|
||||
}
|
||||
return curve0;
|
||||
}
|
||||
|
||||
/*! gathers the hermite curve starting with i'th vertex */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const
|
||||
{
|
||||
p0 = vertex (i+0);
|
||||
p1 = vertex (i+1);
|
||||
t0 = tangent(i+0);
|
||||
t1 = tangent(i+1);
|
||||
}
|
||||
|
||||
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const
|
||||
{
|
||||
p0 = vertex (i+0,itime);
|
||||
p1 = vertex (i+1,itime);
|
||||
t0 = tangent(i+0,itime);
|
||||
t1 = tangent(i+1,itime);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float f0 = 1.0f - ftime, f1 = ftime;
|
||||
Vec3ff ap0,at0,ap1,at1;
|
||||
gather_hermite(ap0,at0,ap1,at1,i,itime);
|
||||
Vec3ff bp0,bt0,bp1,bt1;
|
||||
gather_hermite(bp0,bt0,bp1,bt1,i,itime+1);
|
||||
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
|
||||
t0 = madd(Vec3ff(f0),at0,f1*bt0);
|
||||
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
|
||||
t1 = madd(Vec3ff(f0),at1,f1*bt1);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
|
||||
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather_hermite(p0,t0,p1,t1,i,time);
|
||||
else gather_hermite(p0,t0,p1,t1,i);
|
||||
}
|
||||
|
||||
/*! gathers the hermite curve starting with i'th vertex */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const
|
||||
{
|
||||
p0 = vertex (i+0);
|
||||
p1 = vertex (i+1);
|
||||
t0 = tangent(i+0);
|
||||
t1 = tangent(i+1);
|
||||
n0 = normal(i+0);
|
||||
n1 = normal(i+1);
|
||||
dn0 = dnormal(i+0);
|
||||
dn1 = dnormal(i+1);
|
||||
}
|
||||
|
||||
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const
|
||||
{
|
||||
p0 = vertex (i+0,itime);
|
||||
p1 = vertex (i+1,itime);
|
||||
t0 = tangent(i+0,itime);
|
||||
t1 = tangent(i+1,itime);
|
||||
n0 = normal(i+0,itime);
|
||||
n1 = normal(i+1,itime);
|
||||
dn0 = dnormal(i+0,itime);
|
||||
dn1 = dnormal(i+1,itime);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float f0 = 1.0f - ftime, f1 = ftime;
|
||||
Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1;
|
||||
gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime);
|
||||
Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1;
|
||||
gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1);
|
||||
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
|
||||
t0 = madd(Vec3ff(f0),at0,f1*bt0);
|
||||
n0 = madd(Vec3ff(f0),an0,f1*bn0);
|
||||
dn0= madd(Vec3ff(f0),adn0,f1*bdn0);
|
||||
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
|
||||
t1 = madd(Vec3ff(f0),at1,f1*bt1);
|
||||
n1 = madd(Vec3ff(f0),an1,f1*bn1);
|
||||
dn1= madd(Vec3ff(f0),adn1,f1*bdn1);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i,time);
|
||||
else gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
|
||||
{
|
||||
Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1;
|
||||
unsigned int vertexID = curve(primID);
|
||||
gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime);
|
||||
|
||||
SourceCurve3ff ccurve(v0,t0,v1,t1);
|
||||
SourceCurve3fa ncurve(n0,dn0,n1,dn1);
|
||||
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
|
||||
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
|
||||
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
|
||||
return clerp(curve0,curve1,ftime);
|
||||
}
|
||||
|
||||
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
|
||||
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
|
||||
{
|
||||
float ftime = 0.0f;
|
||||
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
|
||||
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
|
||||
if (hasMotionBlur()) {
|
||||
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
|
||||
return clerp(curve0,curve1,ftime);
|
||||
}
|
||||
return curve0;
|
||||
}
|
||||
|
||||
/* returns the projected area */
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
private:
|
||||
void resizeBuffers(unsigned int numSteps);
|
||||
|
||||
public:
|
||||
BufferView<unsigned int> curves; //!< array of curve indices
|
||||
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
|
||||
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
|
||||
BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer
|
||||
BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer
|
||||
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
|
||||
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
|
||||
Device::vector<BufferView<Vec3ff>> tangents = device; //!< tangent array for each timestep
|
||||
Device::vector<BufferView<Vec3fa>> dnormals = device; //!< normal derivative array for each timestep
|
||||
BufferView<char> flags; //!< start, end flag per segment
|
||||
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
|
||||
int tessellationRate; //!< tessellation rate for flat curve
|
||||
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
|
||||
template<template<typename Ty> class Curve>
|
||||
struct CurveGeometryInterface : public CurveGeometry
|
||||
{
|
||||
typedef Curve<Vec3ff> Curve3ff;
|
||||
typedef Curve<Vec3fa> Curve3fa;
|
||||
|
||||
CurveGeometryInterface (Device* device, Geometry::GType gtype)
|
||||
: CurveGeometry(device,gtype) {}
|
||||
|
||||
__forceinline const Curve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
Vec3ff v0 = vertex(index+0,itime);
|
||||
Vec3ff v1 = vertex(index+1,itime);
|
||||
Vec3ff v2 = vertex(index+2,itime);
|
||||
Vec3ff v3 = vertex(index+3,itime);
|
||||
v0.w *= maxRadiusScale;
|
||||
v1.w *= maxRadiusScale;
|
||||
v2.w *= maxRadiusScale;
|
||||
v3.w *= maxRadiusScale;
|
||||
return Curve3ff (v0,v1,v2,v3);
|
||||
}
|
||||
|
||||
__forceinline const Curve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff v2 = vertex(index+2,itime);
|
||||
const Vec3ff v3 = vertex(index+3,itime);
|
||||
const Vec3ff w0(xfmPoint(space,(Vec3fa)v0), maxRadiusScale*v0.w);
|
||||
const Vec3ff w1(xfmPoint(space,(Vec3fa)v1), maxRadiusScale*v1.w);
|
||||
const Vec3ff w2(xfmPoint(space,(Vec3fa)v2), maxRadiusScale*v2.w);
|
||||
const Vec3ff w3(xfmPoint(space,(Vec3fa)v3), maxRadiusScale*v3.w);
|
||||
return Curve3ff(w0,w1,w2,w3);
|
||||
}
|
||||
|
||||
__forceinline const Curve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
|
||||
{
|
||||
const float r_scale = r_scale0*scale;
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff v2 = vertex(index+2,itime);
|
||||
const Vec3ff v3 = vertex(index+3,itime);
|
||||
const Vec3ff w0(xfmPoint(space,((Vec3fa)v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
|
||||
const Vec3ff w1(xfmPoint(space,((Vec3fa)v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
|
||||
const Vec3ff w2(xfmPoint(space,((Vec3fa)v2-ofs)*Vec3fa(scale)), maxRadiusScale*v2.w*r_scale);
|
||||
const Vec3ff w3(xfmPoint(space,((Vec3fa)v3-ofs)*Vec3fa(scale)), maxRadiusScale*v3.w*r_scale);
|
||||
return Curve3ff(w0,w1,w2,w3);
|
||||
}
|
||||
|
||||
__forceinline const Curve3fa getNormalCurve(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3fa n0 = normal(index+0,itime);
|
||||
const Vec3fa n1 = normal(index+1,itime);
|
||||
const Vec3fa n2 = normal(index+2,itime);
|
||||
const Vec3fa n3 = normal(index+3,itime);
|
||||
return Curve3fa (n0,n1,n2,n3);
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const Curve3ff center = getCurveScaledRadius(i,itime);
|
||||
const Curve3fa normal = getNormalCurve(i,itime);
|
||||
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
|
||||
return ocurve;
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
return getOrientedCurveScaledRadius(i,itime).xfm(space);
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid at the itime'th time step */
|
||||
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
if (index+3 >= numVertices()) return false;
|
||||
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
{
|
||||
const float r0 = radius(index+0,itime);
|
||||
const float r1 = radius(index+1,itime);
|
||||
const float r2 = radius(index+2,itime);
|
||||
const float r3 = radius(index+3,itime);
|
||||
if (!isvalid(r0) || !isvalid(r1) || !isvalid(r2) || !isvalid(r3))
|
||||
return false;
|
||||
|
||||
const Vec3fa v0 = vertex(index+0,itime);
|
||||
const Vec3fa v1 = vertex(index+1,itime);
|
||||
const Vec3fa v2 = vertex(index+2,itime);
|
||||
const Vec3fa v3 = vertex(index+3,itime);
|
||||
if (!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3))
|
||||
return false;
|
||||
|
||||
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
|
||||
{
|
||||
const Vec3fa n0 = normal(index+0,itime);
|
||||
const Vec3fa n1 = normal(index+1,itime);
|
||||
if (!isvalid(n0) || !isvalid(n1))
|
||||
return false;
|
||||
|
||||
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
|
||||
if (!isvalid(b))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void interpolate_impl(const RTCInterpolateArguments* const args)
|
||||
{
|
||||
unsigned int primID = args->primID;
|
||||
float u = args->u;
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
/* calculate base pointer and stride */
|
||||
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
|
||||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
|
||||
const char* src = nullptr;
|
||||
size_t stride = 0;
|
||||
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
|
||||
src = vertexAttribs[bufferSlot].getPtr();
|
||||
stride = vertexAttribs[bufferSlot].getStride();
|
||||
} else {
|
||||
src = vertices[bufferSlot].getPtr();
|
||||
stride = vertices[bufferSlot].getStride();
|
||||
}
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
size_t ofs = i*sizeof(float);
|
||||
const size_t index = curves[primID];
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+0)*stride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+1)*stride+ofs]);
|
||||
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+2)*stride+ofs]);
|
||||
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+3)*stride+ofs]);
|
||||
|
||||
const Curve<vfloat<N>> curve(p0,p1,p2,p3);
|
||||
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
|
||||
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
|
||||
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
|
||||
}
|
||||
}
|
||||
|
||||
void interpolate(const RTCInterpolateArguments* const args) {
|
||||
interpolate_impl<4>(args);
|
||||
}
|
||||
};
|
||||
|
||||
template<template<typename Ty> class Curve>
|
||||
struct HermiteCurveGeometryInterface : public CurveGeometry
|
||||
{
|
||||
typedef Curve<Vec3ff> HermiteCurve3ff;
|
||||
typedef Curve<Vec3fa> HermiteCurve3fa;
|
||||
|
||||
HermiteCurveGeometryInterface (Device* device, Geometry::GType gtype)
|
||||
: CurveGeometry(device,gtype) {}
|
||||
|
||||
__forceinline const HermiteCurve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
Vec3ff v0 = vertex(index+0,itime);
|
||||
Vec3ff v1 = vertex(index+1,itime);
|
||||
Vec3ff t0 = tangent(index+0,itime);
|
||||
Vec3ff t1 = tangent(index+1,itime);
|
||||
v0.w *= maxRadiusScale;
|
||||
v1.w *= maxRadiusScale;
|
||||
t0.w *= maxRadiusScale;
|
||||
t1.w *= maxRadiusScale;
|
||||
return HermiteCurve3ff (v0,t0,v1,t1);
|
||||
}
|
||||
|
||||
__forceinline const HermiteCurve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff t0 = tangent(index+0,itime);
|
||||
const Vec3ff t1 = tangent(index+1,itime);
|
||||
const Vec3ff V0(xfmPoint(space,(Vec3fa)v0),maxRadiusScale*v0.w);
|
||||
const Vec3ff V1(xfmPoint(space,(Vec3fa)v1),maxRadiusScale*v1.w);
|
||||
const Vec3ff T0(xfmVector(space,(Vec3fa)t0),maxRadiusScale*t0.w);
|
||||
const Vec3ff T1(xfmVector(space,(Vec3fa)t1),maxRadiusScale*t1.w);
|
||||
return HermiteCurve3ff(V0,T0,V1,T1);
|
||||
}
|
||||
|
||||
__forceinline const HermiteCurve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
|
||||
{
|
||||
const float r_scale = r_scale0*scale;
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff t0 = tangent(index+0,itime);
|
||||
const Vec3ff t1 = tangent(index+1,itime);
|
||||
const Vec3ff V0(xfmPoint(space,(v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
|
||||
const Vec3ff V1(xfmPoint(space,(v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
|
||||
const Vec3ff T0(xfmVector(space,t0*Vec3fa(scale)), maxRadiusScale*t0.w*r_scale);
|
||||
const Vec3ff T1(xfmVector(space,t1*Vec3fa(scale)), maxRadiusScale*t1.w*r_scale);
|
||||
return HermiteCurve3ff(V0,T0,V1,T1);
|
||||
}
|
||||
|
||||
__forceinline const HermiteCurve3fa getNormalCurve(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
const Vec3fa n0 = normal(index+0,itime);
|
||||
const Vec3fa n1 = normal(index+1,itime);
|
||||
const Vec3fa dn0 = dnormal(index+0,itime);
|
||||
const Vec3fa dn1 = dnormal(index+1,itime);
|
||||
return HermiteCurve3fa (n0,dn0,n1,dn1);
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
|
||||
{
|
||||
const HermiteCurve3ff center = getCurveScaledRadius(i,itime);
|
||||
const HermiteCurve3fa normal = getNormalCurve(i,itime);
|
||||
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
|
||||
return ocurve;
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
return getOrientedCurveScaledRadius(i,itime).xfm(space);
|
||||
}
|
||||
|
||||
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid at the itime'th time step */
|
||||
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
const unsigned int index = curve(i);
|
||||
if (index+1 >= numVertices()) return false;
|
||||
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
{
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
if (!isvalid4(v0) || !isvalid4(v1))
|
||||
return false;
|
||||
|
||||
const Vec3ff t0 = tangent(index+0,itime);
|
||||
const Vec3ff t1 = tangent(index+1,itime);
|
||||
if (!isvalid4(t0) || !isvalid4(t1))
|
||||
return false;
|
||||
|
||||
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
|
||||
{
|
||||
const Vec3fa n0 = normal(index+0,itime);
|
||||
const Vec3fa n1 = normal(index+1,itime);
|
||||
if (!isvalid(n0) || !isvalid(n1))
|
||||
return false;
|
||||
|
||||
const Vec3fa dn0 = dnormal(index+0,itime);
|
||||
const Vec3fa dn1 = dnormal(index+1,itime);
|
||||
if (!isvalid(dn0) || !isvalid(dn1))
|
||||
return false;
|
||||
|
||||
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
|
||||
if (!isvalid(b))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void interpolate_impl(const RTCInterpolateArguments* const args)
|
||||
{
|
||||
unsigned int primID = args->primID;
|
||||
float u = args->u;
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
/* we interpolate vertex attributes linearly for hermite basis */
|
||||
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
|
||||
{
|
||||
assert(bufferSlot <= vertexAttribs.size());
|
||||
const char* vsrc = vertexAttribs[bufferSlot].getPtr();
|
||||
const size_t vstride = vertexAttribs[bufferSlot].getStride();
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
const size_t ofs = i*sizeof(float);
|
||||
const size_t index = curves[primID];
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
|
||||
|
||||
if (P ) mem<vfloat<N>>::storeu(valid,P+i, madd(1.0f-u,p0,u*p1));
|
||||
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, p1-p0);
|
||||
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
|
||||
}
|
||||
}
|
||||
|
||||
/* interpolation for vertex buffers */
|
||||
else
|
||||
{
|
||||
assert(bufferSlot < numTimeSteps);
|
||||
const char* vsrc = vertices[bufferSlot].getPtr();
|
||||
const char* tsrc = tangents[bufferSlot].getPtr();
|
||||
const size_t vstride = vertices[bufferSlot].getStride();
|
||||
const size_t tstride = vertices[bufferSlot].getStride();
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
const size_t ofs = i*sizeof(float);
|
||||
const size_t index = curves[primID];
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
|
||||
const vfloat<N> t0 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+0)*tstride+ofs]);
|
||||
const vfloat<N> t1 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+1)*tstride+ofs]);
|
||||
|
||||
const HermiteCurveT<vfloat<N>> curve(p0,t0,p1,t1);
|
||||
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
|
||||
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
|
||||
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void interpolate(const RTCInterpolateArguments* const args) {
|
||||
interpolate_impl<4>(args);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType);
|
||||
}
|
||||
468
engine/thirdparty/embree/kernels/common/scene_grid_mesh.h
vendored
Normal file
468
engine/thirdparty/embree/kernels/common/scene_grid_mesh.h
vendored
Normal file
|
|
@ -0,0 +1,468 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "geometry.h"
|
||||
#include "buffer.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Grid Mesh */
|
||||
struct GridMesh : public Geometry
|
||||
{
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
|
||||
|
||||
/*! grid */
|
||||
struct Grid
|
||||
{
|
||||
unsigned int startVtxID;
|
||||
unsigned int lineVtxOffset;
|
||||
unsigned short resX,resY;
|
||||
|
||||
/* border flags due to 3x3 vertex pattern */
|
||||
__forceinline unsigned int get3x3FlagsX(const unsigned int x) const
|
||||
{
|
||||
return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
|
||||
}
|
||||
|
||||
/* border flags due to 3x3 vertex pattern */
|
||||
__forceinline unsigned int get3x3FlagsY(const unsigned int y) const
|
||||
{
|
||||
return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
|
||||
}
|
||||
|
||||
/*! outputs grid structure */
|
||||
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
|
||||
return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! grid mesh construction */
|
||||
GridMesh (Device* device);
|
||||
|
||||
/* geometry interface */
|
||||
public:
|
||||
void setMask(unsigned mask);
|
||||
void setNumTimeSteps (unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount (unsigned int N);
|
||||
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void commit();
|
||||
bool verify();
|
||||
void interpolate(const RTCInterpolateArguments* const args);
|
||||
|
||||
template<int N>
|
||||
void interpolate_impl(const RTCInterpolateArguments* const args)
|
||||
{
|
||||
unsigned int primID = args->primID;
|
||||
float U = args->u;
|
||||
float V = args->v;
|
||||
|
||||
/* clamp input u,v to [0;1] range */
|
||||
U = max(min(U,1.0f),0.0f);
|
||||
V = max(min(V,1.0f),0.0f);
|
||||
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* dPdv = args->dPdv;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
float* ddPdvdv = args->ddPdvdv;
|
||||
float* ddPdudv = args->ddPdudv;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
/* calculate base pointer and stride */
|
||||
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
|
||||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
|
||||
const char* src = nullptr;
|
||||
size_t stride = 0;
|
||||
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
|
||||
src = vertexAttribs[bufferSlot].getPtr();
|
||||
stride = vertexAttribs[bufferSlot].getStride();
|
||||
} else {
|
||||
src = vertices[bufferSlot].getPtr();
|
||||
stride = vertices[bufferSlot].getStride();
|
||||
}
|
||||
|
||||
const Grid& grid = grids[primID];
|
||||
const int grid_width = grid.resX-1;
|
||||
const int grid_height = grid.resY-1;
|
||||
const float rcp_grid_width = rcp(float(grid_width));
|
||||
const float rcp_grid_height = rcp(float(grid_height));
|
||||
const int iu = min((int)floor(U*grid_width ),grid_width);
|
||||
const int iv = min((int)floor(V*grid_height),grid_height);
|
||||
const float u = U*grid_width-float(iu);
|
||||
const float v = V*grid_height-float(iv);
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
const size_t ofs = i*sizeof(float);
|
||||
const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu;
|
||||
const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu;
|
||||
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]);
|
||||
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]);
|
||||
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]);
|
||||
const vbool<N> left = u+v <= 1.0f;
|
||||
const vfloat<N> Q0 = select(left,p0,p2);
|
||||
const vfloat<N> Q1 = select(left,p1,p3);
|
||||
const vfloat<N> Q2 = select(left,p3,p1);
|
||||
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
|
||||
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
|
||||
const vfloat<N> W = 1.0f-U-V;
|
||||
|
||||
if (P) {
|
||||
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width);
|
||||
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height);
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
|
||||
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
|
||||
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
|
||||
__forceinline unsigned int getNumTotalQuads() const
|
||||
{
|
||||
size_t quads = 0;
|
||||
for (size_t primID=0; primID<numPrimitives; primID++)
|
||||
quads += getNumQuads(primID);
|
||||
return quads;
|
||||
}
|
||||
|
||||
__forceinline unsigned int getNumQuads(const size_t gridID) const
|
||||
{
|
||||
const Grid& g = grid(gridID);
|
||||
return (unsigned int) max((int)1,((int)g.resX-1) * ((int)g.resY-1));
|
||||
}
|
||||
|
||||
__forceinline unsigned int getNumSubGrids(const size_t gridID) const
|
||||
{
|
||||
const Grid& g = grid(gridID);
|
||||
return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
|
||||
}
|
||||
|
||||
/*! get fast access to first vertex buffer */
|
||||
__forceinline float * getCompactVertexArray () const {
|
||||
return (float*) vertices0.getPtr();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! returns number of vertices */
|
||||
__forceinline size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! returns i'th grid*/
|
||||
__forceinline const Grid& grid(size_t i) const {
|
||||
return grids[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
|
||||
return vertices0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline const char* vertexPtr(size_t i) const {
|
||||
return vertices0.getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
|
||||
return vertices[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of for specified time */
|
||||
__forceinline const Vec3fa vertex(size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3fa v0 = vertex(i, itime+0);
|
||||
Vec3fa v1 = vertex(i, itime+1);
|
||||
return madd(Vec3fa(t0),v0,t1*v1);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
|
||||
return vertices[itime].getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first timestep */
|
||||
__forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
|
||||
assert(x < (size_t)g.resX);
|
||||
assert(y < (size_t)g.resY);
|
||||
return g.startVtxID + x + y * g.lineVtxOffset;
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first timestep */
|
||||
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
|
||||
const size_t index = grid_vertex_index(g,x,y);
|
||||
return vertex(index);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the itime'th timestep */
|
||||
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
|
||||
const size_t index = grid_vertex_index(g,x,y);
|
||||
return vertex(index,itime);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the itime'th timestep */
|
||||
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, float time) const {
|
||||
const size_t index = grid_vertex_index(g,x,y);
|
||||
return vertex(index,time);
|
||||
}
|
||||
|
||||
/*! gathers quad vertices */
|
||||
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y) const
|
||||
{
|
||||
v0 = grid_vertex(g,x+0,y+0);
|
||||
v1 = grid_vertex(g,x+1,y+0);
|
||||
v2 = grid_vertex(g,x+1,y+1);
|
||||
v3 = grid_vertex(g,x+0,y+1);
|
||||
}
|
||||
|
||||
/*! gathers quad vertices for specified time */
|
||||
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
|
||||
{
|
||||
v0 = grid_vertex(g,x+0,y+0,time);
|
||||
v1 = grid_vertex(g,x+1,y+0,time);
|
||||
v2 = grid_vertex(g,x+1,y+1,time);
|
||||
v3 = grid_vertex(g,x+0,y+1,time);
|
||||
}
|
||||
|
||||
/*! gathers quad vertices for mblur and non-mblur meshes */
|
||||
__forceinline void gather_quad_vertices_safe(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather_quad_vertices(v0,v1,v2,v3,g,x,y,time);
|
||||
else gather_quad_vertices(v0,v1,v2,v3,g,x,y);
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th quad, if it's valid */
|
||||
__forceinline bool buildBoundsQuad(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
|
||||
{
|
||||
BBox3fa b(empty);
|
||||
for (size_t t=0; t<numTimeSteps; t++)
|
||||
{
|
||||
for (size_t y=sy;y<sy+2;y++)
|
||||
for (size_t x=sx;x<sx+2;x++)
|
||||
{
|
||||
const Vec3fa v = grid_vertex(g,x,y,t);
|
||||
if (unlikely(!isvalid(v))) return false;
|
||||
b.extend(v);
|
||||
}
|
||||
}
|
||||
|
||||
bbox = b;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive, if it's valid */
|
||||
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
|
||||
{
|
||||
BBox3fa b(empty);
|
||||
for (size_t t=0; t<numTimeSteps; t++)
|
||||
{
|
||||
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
|
||||
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
|
||||
{
|
||||
const Vec3fa v = grid_vertex(g,x,y,t);
|
||||
if (unlikely(!isvalid(v))) return false;
|
||||
b.extend(v);
|
||||
}
|
||||
}
|
||||
|
||||
bbox = b;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
assert(itime < numTimeSteps);
|
||||
BBox3fa b0(empty);
|
||||
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
|
||||
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
|
||||
{
|
||||
const Vec3fa v = grid_vertex(g,x,y,itime);
|
||||
if (unlikely(!isvalid(v))) return false;
|
||||
b0.extend(v);
|
||||
}
|
||||
|
||||
/* use bounds of first time step in builder */
|
||||
bbox = b0;
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline bool valid(size_t gridID, size_t itime=0) const {
|
||||
return valid(gridID, make_range(itime, itime));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
|
||||
{
|
||||
if (unlikely(gridID >= grids.size())) return false;
|
||||
const Grid &g = grid(gridID);
|
||||
if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false;
|
||||
if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
|
||||
|
||||
for (size_t y=0;y<g.resY;y++)
|
||||
for (size_t x=0;x<g.resX;x++)
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
if (!isvalid(grid_vertex(g,x,y,itime))) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
|
||||
{
|
||||
BBox3fa box(empty);
|
||||
buildBounds(g,sx,sy,itime,box);
|
||||
return box;
|
||||
}
|
||||
|
||||
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
|
||||
BBox3fa bounds0, bounds1;
|
||||
buildBounds(g,sx,sy,itime+0,bounds0);
|
||||
buildBounds(g,sx,sy,itime+1,bounds1);
|
||||
return LBBox3fa(bounds0,bounds1);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
return pos_inf;
|
||||
}
|
||||
|
||||
public:
|
||||
BufferView<Grid> grids; //!< array of triangles
|
||||
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
|
||||
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
|
||||
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attributes
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
|
||||
public:
|
||||
struct PrimID_XY { uint32_t primID; uint16_t x,y; };
|
||||
Device::vector<PrimID_XY> quadID_to_primID_xy = device; //!< maps a quad to the primitive ID and grid coordinates
|
||||
#endif
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct GridMeshISA : public GridMesh
|
||||
{
|
||||
GridMeshISA (Device* device)
|
||||
: GridMesh(device) {}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t buildID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const override {
|
||||
const SubGridBuildData &subgrid = sgrids[buildID];
|
||||
const unsigned int primID = subgrid.primID;
|
||||
const size_t x = subgrid.x();
|
||||
const size_t y = subgrid.y();
|
||||
return linearBounds(grid(primID),x,y,time_range);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const override
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
const PrimID_XY& quad = quadID_to_primID_xy[j];
|
||||
if (!buildBoundsQuad(grids[quad.primID],quad.x,quad.y,bounds)) continue;
|
||||
const PrimRef prim(bounds,geomID,unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const override
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!valid(j)) continue;
|
||||
const GridMesh::Grid &g = grid(j);
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
{
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
|
||||
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)k);
|
||||
pinfo.add_center2(prim);
|
||||
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[k++] = prim;
|
||||
}
|
||||
}
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT)
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const override
|
||||
{
|
||||
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
const PrimID_XY& quad = quadID_to_primID_xy[j];
|
||||
const LBBox3fa lbounds = linearBounds(grids[quad.primID],quad.x,quad.y,t0t1);
|
||||
const PrimRef prim(lbounds.bounds(), unsigned(geomID), unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const override
|
||||
{
|
||||
PrimInfoMB pinfoMB(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!valid(j, timeSegmentRange(t0t1))) continue;
|
||||
const GridMesh::Grid &g = grid(j);
|
||||
|
||||
for (unsigned int y=0; y<g.resY-1u; y+=2)
|
||||
{
|
||||
for (unsigned int x=0; x<g.resX-1u; x+=2)
|
||||
{
|
||||
const PrimRefMB prim(linearBounds(g,x,y,t0t1),numTimeSegments(),time_range,numTimeSegments(),unsigned(geomID),unsigned(k));
|
||||
pinfoMB.add_primref(prim);
|
||||
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
|
||||
prims[k++] = prim;
|
||||
}
|
||||
}
|
||||
}
|
||||
return pinfoMB;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
|
||||
}
|
||||
302
engine/thirdparty/embree/kernels/common/scene_instance.h
vendored
Normal file
302
engine/thirdparty/embree/kernels/common/scene_instance.h
vendored
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "geometry.h"
|
||||
#include "accel.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct MotionDerivativeCoefficients;
|
||||
|
||||
/*! Instanced acceleration structure */
|
||||
struct Instance : public Geometry
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
|
||||
|
||||
public:
|
||||
Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1);
|
||||
~Instance();
|
||||
|
||||
private:
|
||||
Instance (const Instance& other) DELETED; // do not implement
|
||||
Instance& operator= (const Instance& other) DELETED; // do not implement
|
||||
|
||||
private:
|
||||
LBBox3fa nonlinearBounds(const BBox1f& time_range_in,
|
||||
const BBox1f& geom_time_range,
|
||||
float geom_time_segments) const;
|
||||
|
||||
BBox3fa boundSegment(size_t itime,
|
||||
BBox3fa const& obbox0, BBox3fa const& obbox1,
|
||||
BBox3fa const& bbox0, BBox3fa const& bbox1,
|
||||
float t_min, float t_max) const;
|
||||
|
||||
/* calculates the (correct) interpolated bounds */
|
||||
__forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(slerp(local2world[itime0], local2world[itime1], f),
|
||||
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
|
||||
return xfmBounds(lerp(local2world[itime0], local2world[itime1], f),
|
||||
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
|
||||
}
|
||||
|
||||
public:
|
||||
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
|
||||
virtual void setInstancedScene(const Ref<Scene>& scene) override;
|
||||
virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override;
|
||||
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override;
|
||||
virtual AffineSpace3fa getTransform(float time) override;
|
||||
virtual AffineSpace3fa getTransform(size_t, float time) override;
|
||||
virtual void setMask (unsigned mask) override;
|
||||
virtual void build() {}
|
||||
virtual void addElementsToCount (GeometryCounts & counts) const override;
|
||||
virtual void commit() override;
|
||||
|
||||
public:
|
||||
|
||||
/*! calculates the bounds of instance */
|
||||
__forceinline BBox3fa bounds(size_t i) const {
|
||||
assert(i == 0);
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds());
|
||||
return xfmBounds(local2world[0],object->bounds.bounds());
|
||||
}
|
||||
|
||||
/*! gets the bounds of the instanced scene */
|
||||
__forceinline BBox3fa getObjectBounds(size_t itime) const {
|
||||
return object->getBounds(timeStep(itime));
|
||||
}
|
||||
|
||||
/*! calculates the bounds of instance */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
|
||||
assert(i == 0);
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime));
|
||||
return xfmBounds(local2world[itime],getObjectBounds(itime));
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
|
||||
assert(i == 0);
|
||||
LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments);
|
||||
return lbbox;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
|
||||
{
|
||||
assert(i==0);
|
||||
const BBox3fa b = bounds(i);
|
||||
if (bbox) *bbox = b;
|
||||
return isvalid(b);
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
assert(i==0);
|
||||
const LBBox3fa bounds = linearBounds(i,itime);
|
||||
bbox = bounds.bounds ();
|
||||
return isvalid(bounds);
|
||||
}
|
||||
|
||||
/* gets version info of topology */
|
||||
unsigned int getTopologyVersion() const {
|
||||
return numPrimitives;
|
||||
}
|
||||
|
||||
/* returns true if topology changed */
|
||||
bool topologyChanged(unsigned int otherVersion) const {
|
||||
return numPrimitives != otherVersion;
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
assert(i == 0);
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
if (!isvalid(bounds(i,itime))) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getLocal2World() const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return quaternionDecompositionToAffineSpace(local2world[0]);
|
||||
return local2world[0];
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getLocal2World(float t) const
|
||||
{
|
||||
if (numTimeSegments() > 0) {
|
||||
float ftime; const unsigned int itime = timeSegment(t, ftime);
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return slerp(local2world[itime+0],local2world[itime+1],ftime);
|
||||
return lerp(local2world[itime+0],local2world[itime+1],ftime);
|
||||
}
|
||||
return getLocal2World();
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getWorld2Local() const {
|
||||
return world2local0;
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getWorld2Local(float t) const {
|
||||
if (numTimeSegments() > 0)
|
||||
return rcp(getLocal2World(t));
|
||||
return getWorld2Local();
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return getWorld2LocalSlerp<K>(valid, t);
|
||||
return getWorld2LocalLerp<K>(valid, t);
|
||||
}
|
||||
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
return area(bounds(i));
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
vfloat<K> ftime;
|
||||
const vint<K> itime_k = timeSegment<K>(t, ftime);
|
||||
assert(any(valid));
|
||||
const size_t index = bsf(movemask(valid));
|
||||
const int itime = itime_k[index];
|
||||
if (likely(all(valid, itime_k == vint<K>(itime)))) {
|
||||
return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]),
|
||||
AffineSpace3vff<K>(local2world[itime+1]),
|
||||
ftime));
|
||||
}
|
||||
else {
|
||||
AffineSpace3vff<K> space0,space1;
|
||||
vbool<K> valid1 = valid;
|
||||
while (any(valid1)) {
|
||||
vbool<K> valid2;
|
||||
const int itime = next_unique(valid1, itime_k, valid2);
|
||||
space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0);
|
||||
space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1);
|
||||
}
|
||||
return rcp(slerp(space0, space1, ftime));
|
||||
}
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
vfloat<K> ftime;
|
||||
const vint<K> itime_k = timeSegment<K>(t, ftime);
|
||||
assert(any(valid));
|
||||
const size_t index = bsf(movemask(valid));
|
||||
const int itime = itime_k[index];
|
||||
if (likely(all(valid, itime_k == vint<K>(itime)))) {
|
||||
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]),
|
||||
AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]),
|
||||
ftime));
|
||||
} else {
|
||||
AffineSpace3vf<K> space0,space1;
|
||||
vbool<K> valid1 = valid;
|
||||
while (any(valid1)) {
|
||||
vbool<K> valid2;
|
||||
const int itime = next_unique(valid1, itime_k, valid2);
|
||||
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0);
|
||||
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1);
|
||||
}
|
||||
return rcp(lerp(space0, space1, ftime));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
Accel* object; //!< pointer to instanced acceleration structure
|
||||
AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
|
||||
AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct InstanceISA : public Instance
|
||||
{
|
||||
InstanceISA (Device* device)
|
||||
: Instance(device) {}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(primID,time_range);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
assert(r.begin() == 0);
|
||||
assert(r.end() == 1);
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
BBox3fa b = empty;
|
||||
if (!buildBounds(0,&b)) return pinfo;
|
||||
// const BBox3fa b = bounds(0);
|
||||
// if (!isvalid(b)) return pinfo;
|
||||
|
||||
const PrimRef prim(b,geomID,unsigned(0));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
assert(r.begin() == 0);
|
||||
assert(r.end() == 1);
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
BBox3fa b = empty;
|
||||
if (!buildBounds(0,&b)) return pinfo;
|
||||
// if (!valid(0,range<size_t>(itime))) return pinfo;
|
||||
// const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0));
|
||||
const PrimRef prim(b,geomID,unsigned(0));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
assert(r.begin() == 0);
|
||||
assert(r.end() == 1);
|
||||
|
||||
PrimInfo pinfo(empty);
|
||||
const BBox1f t0t1 = intersect(getTimeRange(), time_range);
|
||||
if (t0t1.empty()) return pinfo;
|
||||
|
||||
const BBox3fa bounds = linearBounds(0, t0t1).bounds();
|
||||
const PrimRef prim(bounds, geomID, unsigned(0));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
assert(r.begin() == 0);
|
||||
assert(r.end() == 1);
|
||||
|
||||
PrimInfoMB pinfo(empty);
|
||||
if (!valid(0, timeSegmentRange(t0t1))) return pinfo;
|
||||
const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0));
|
||||
pinfo.add_primref(prim);
|
||||
prims[k++] = prim;
|
||||
return pinfo;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*);
|
||||
}
|
||||
385
engine/thirdparty/embree/kernels/common/scene_instance_array.h
vendored
Normal file
385
engine/thirdparty/embree/kernels/common/scene_instance_array.h
vendored
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "geometry.h"
|
||||
#include "accel.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct MotionDerivativeCoefficients;
|
||||
|
||||
/*! Instanced acceleration structure */
|
||||
struct InstanceArray : public Geometry
|
||||
{
|
||||
//ALIGNED_STRUCT_(16);
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY;
|
||||
|
||||
public:
|
||||
InstanceArray (Device* device, unsigned int numTimeSteps = 1);
|
||||
~InstanceArray();
|
||||
|
||||
private:
|
||||
InstanceArray (const InstanceArray& other) DELETED; // do not implement
|
||||
InstanceArray& operator= (const InstanceArray& other) DELETED; // do not implement
|
||||
|
||||
private:
|
||||
LBBox3fa nonlinearBounds(size_t i,
|
||||
const BBox1f& time_range_in,
|
||||
const BBox1f& geom_time_range,
|
||||
float geom_time_segments) const;
|
||||
|
||||
BBox3fa boundSegment(size_t i, size_t itime,
|
||||
BBox3fa const& obbox0, BBox3fa const& obbox1,
|
||||
BBox3fa const& bbox0, BBox3fa const& bbox1,
|
||||
float t_min, float t_max) const;
|
||||
|
||||
/* calculates the (correct) interpolated bounds */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime0, size_t itime1, float f) const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(slerp(l2w(i, itime0), l2w(i, itime1), f),
|
||||
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
|
||||
return xfmBounds(lerp(l2w(i, itime0), l2w(i, itime1), f),
|
||||
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
|
||||
virtual void* getBuffer(RTCBufferType type, unsigned int slot) override;
|
||||
virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
|
||||
|
||||
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
|
||||
virtual void setInstancedScene(const Ref<Scene>& scene) override;
|
||||
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) override;
|
||||
virtual AffineSpace3fa getTransform(size_t, float time) override;
|
||||
virtual void setMask (unsigned mask) override;
|
||||
virtual void build() {}
|
||||
virtual void addElementsToCount (GeometryCounts & counts) const override;
|
||||
virtual void commit() override;
|
||||
|
||||
public:
|
||||
|
||||
/*! calculates the bounds of instance */
|
||||
__forceinline BBox3fa bounds(size_t i) const {
|
||||
if (!valid(i))
|
||||
return BBox3fa();
|
||||
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, 0)),getObject(i)->bounds.bounds());
|
||||
return xfmBounds(l2w(i, 0),getObject(i)->bounds.bounds());
|
||||
}
|
||||
|
||||
/*! gets the bounds of the instanced scene */
|
||||
__forceinline BBox3fa getObjectBounds(size_t i, size_t itime) const {
|
||||
if (!valid(i))
|
||||
return BBox3fa();
|
||||
|
||||
return getObject(i)->getBounds(timeStep(itime));
|
||||
}
|
||||
|
||||
/*! calculates the bounds of instance */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
|
||||
if (!valid(i))
|
||||
return BBox3fa();
|
||||
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, itime)),getObjectBounds(i, itime));
|
||||
return xfmBounds(l2w(i, itime),getObjectBounds(i, itime));
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
|
||||
if (!valid(i))
|
||||
return LBBox3fa();
|
||||
|
||||
LBBox3fa lbbox = nonlinearBounds(i, dt, time_range, fnumTimeSegments);
|
||||
return lbbox;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
|
||||
{
|
||||
if (!valid(i))
|
||||
return false;
|
||||
|
||||
const BBox3fa b = bounds(i);
|
||||
if (bbox) *bbox = b;
|
||||
return isvalid(b);
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i))
|
||||
return false;
|
||||
|
||||
const LBBox3fa bounds = linearBounds(i,itime);
|
||||
bbox = bounds.bounds ();
|
||||
return isvalid(bounds);
|
||||
}
|
||||
|
||||
/* gets version info of topology */
|
||||
unsigned int getTopologyVersion() const {
|
||||
return numPrimitives;
|
||||
}
|
||||
|
||||
/* returns true if topology changed */
|
||||
bool topologyChanged(unsigned int otherVersion) const {
|
||||
return numPrimitives != otherVersion;
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i) const
|
||||
{
|
||||
if (object) return true;
|
||||
return (object_ids[i] != (unsigned int)(-1));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
if (!isvalid(bounds(i,itime))) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getLocal2World(size_t i) const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return quaternionDecompositionToAffineSpace(l2w(i,0));
|
||||
return l2w(i, 0);
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getLocal2World(size_t i, float t) const
|
||||
{
|
||||
if (numTimeSegments() > 0) {
|
||||
float ftime; const unsigned int itime = timeSegment(t, ftime);
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return slerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
|
||||
return lerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
|
||||
}
|
||||
return getLocal2World(i);
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getWorld2Local(size_t i) const {
|
||||
return rcp(getLocal2World(i));
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa getWorld2Local(size_t i, float t) const {
|
||||
return rcp(getLocal2World(i, t));
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2Local(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
|
||||
return getWorld2LocalSlerp<K>(i, valid, t);
|
||||
return getWorld2LocalLerp<K>(i, valid, t);
|
||||
}
|
||||
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
return area(bounds(i));
|
||||
}
|
||||
|
||||
inline Accel* getObject(size_t i) const {
|
||||
if (object) {
|
||||
return object;
|
||||
}
|
||||
|
||||
assert(objects);
|
||||
assert(i < numPrimitives);
|
||||
if (object_ids[i] == (unsigned int)(-1))
|
||||
return nullptr;
|
||||
|
||||
assert(object_ids[i] < numObjects);
|
||||
return objects[object_ids[i]];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
vfloat<K> ftime;
|
||||
const vint<K> itime_k = timeSegment<K>(t, ftime);
|
||||
assert(any(valid));
|
||||
const size_t index = bsf(movemask(valid));
|
||||
const int itime = itime_k[index];
|
||||
if (likely(all(valid, itime_k == vint<K>(itime)))) {
|
||||
return rcp(slerp(AffineSpace3vff<K>(l2w(i, itime+0)),
|
||||
AffineSpace3vff<K>(l2w(i, itime+1)),
|
||||
ftime));
|
||||
}
|
||||
else {
|
||||
AffineSpace3vff<K> space0,space1;
|
||||
vbool<K> valid1 = valid;
|
||||
while (any(valid1)) {
|
||||
vbool<K> valid2;
|
||||
const int itime = next_unique(valid1, itime_k, valid2);
|
||||
space0 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+0)), space0);
|
||||
space1 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+1)), space1);
|
||||
}
|
||||
return rcp(slerp(space0, space1, ftime));
|
||||
}
|
||||
}
|
||||
|
||||
template<int K>
|
||||
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
|
||||
{
|
||||
vfloat<K> ftime;
|
||||
const vint<K> itime_k = timeSegment<K>(t, ftime);
|
||||
assert(any(valid));
|
||||
const size_t index = bsf(movemask(valid));
|
||||
const int itime = itime_k[index];
|
||||
if (likely(all(valid, itime_k == vint<K>(itime)))) {
|
||||
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)),
|
||||
AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)),
|
||||
ftime));
|
||||
} else {
|
||||
AffineSpace3vf<K> space0,space1;
|
||||
vbool<K> valid1 = valid;
|
||||
while (any(valid1)) {
|
||||
vbool<K> valid2;
|
||||
const int itime = next_unique(valid1, itime_k, valid2);
|
||||
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)), space0);
|
||||
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)), space1);
|
||||
}
|
||||
return rcp(lerp(space0, space1, ftime));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
__forceinline AffineSpace3ff l2w(size_t i, size_t itime) const {
|
||||
if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR) {
|
||||
return *(AffineSpace3ff*)(l2w_buf[itime].getPtr(i));
|
||||
}
|
||||
else if(l2w_buf[itime].getFormat() == RTC_FORMAT_QUATERNION_DECOMPOSITION) {
|
||||
AffineSpace3ff transform;
|
||||
QuaternionDecomposition* qd = (QuaternionDecomposition*)l2w_buf[itime].getPtr(i);
|
||||
transform.l.vx.x = qd->scale_x;
|
||||
transform.l.vy.y = qd->scale_y;
|
||||
transform.l.vz.z = qd->scale_z;
|
||||
transform.l.vy.x = qd->skew_xy;
|
||||
transform.l.vz.x = qd->skew_xz;
|
||||
transform.l.vz.y = qd->skew_yz;
|
||||
transform.l.vx.y = qd->translation_x;
|
||||
transform.l.vx.z = qd->translation_y;
|
||||
transform.l.vy.z = qd->translation_z;
|
||||
transform.p.x = qd->shift_x;
|
||||
transform.p.y = qd->shift_y;
|
||||
transform.p.z = qd->shift_z;
|
||||
// normalize quaternion
|
||||
Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k);
|
||||
q = normalize(q);
|
||||
transform.l.vx.w = q.i;
|
||||
transform.l.vy.w = q.j;
|
||||
transform.l.vz.w = q.k;
|
||||
transform.p.w = q.r;
|
||||
return transform;
|
||||
}
|
||||
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR) {
|
||||
AffineSpace3f* l2w = reinterpret_cast<AffineSpace3f*>(l2w_buf[itime].getPtr(i));
|
||||
return AffineSpace3ff(*l2w);
|
||||
}
|
||||
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_ROW_MAJOR) {
|
||||
float* data = reinterpret_cast<float*>(l2w_buf[itime].getPtr(i));
|
||||
AffineSpace3f l2w;
|
||||
l2w.l.vx.x = data[0]; l2w.l.vy.x = data[1]; l2w.l.vz.x = data[2]; l2w.p.x = data[3];
|
||||
l2w.l.vx.y = data[4]; l2w.l.vy.y = data[5]; l2w.l.vz.y = data[6]; l2w.p.y = data[7];
|
||||
l2w.l.vx.z = data[8]; l2w.l.vy.z = data[9]; l2w.l.vz.z = data[10]; l2w.p.z = data[11];
|
||||
return l2w;
|
||||
}
|
||||
assert(false);
|
||||
return AffineSpace3ff();
|
||||
}
|
||||
|
||||
inline AffineSpace3ff l2w(size_t i) const {
|
||||
return l2w(i, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
Accel* object; //!< fast path if only one scene is instanced
|
||||
Accel** objects;
|
||||
uint32_t numObjects;
|
||||
Device::vector<RawBufferView> l2w_buf = device; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
|
||||
BufferView<uint32_t> object_ids; //!< array of scene ids per instance array primitive
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct InstanceArrayISA : public InstanceArray
|
||||
{
|
||||
InstanceArrayISA (Device* device)
|
||||
: InstanceArray(device) {}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(primID,time_range);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j, &bounds) || !valid(j))
|
||||
continue;
|
||||
const PrimRef prim(bounds, geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j, itime, bounds))
|
||||
continue;
|
||||
const PrimRef prim(bounds, geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
|
||||
if (t0t1.empty()) return pinfo;
|
||||
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
LBBox3fa lbounds = linearBounds(j, t0t1);
|
||||
if (!isvalid(lbounds.bounds()))
|
||||
continue;
|
||||
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
if (!valid(j, timeSegmentRange(t0t1)))
|
||||
continue;
|
||||
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
|
||||
pinfo.add_primref(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(InstanceArray*, createInstanceArray, Device*);
|
||||
}
|
||||
634
engine/thirdparty/embree/kernels/common/scene_line_segments.h
vendored
Normal file
634
engine/thirdparty/embree/kernels/common/scene_line_segments.h
vendored
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "default.h"
|
||||
#include "geometry.h"
|
||||
#include "buffer.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! represents an array of line segments */
|
||||
struct LineSegments : public Geometry
|
||||
{
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2;
|
||||
|
||||
public:
|
||||
|
||||
/*! line segments construction */
|
||||
LineSegments (Device* device, Geometry::GType gtype);
|
||||
|
||||
public:
|
||||
void setMask (unsigned mask);
|
||||
void setNumTimeSteps (unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount (unsigned int N);
|
||||
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void commit();
|
||||
bool verify ();
|
||||
void interpolate(const RTCInterpolateArguments* const args);
|
||||
void setTessellationRate(float N);
|
||||
void setMaxRadiusScale(float s);
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
|
||||
template<int N>
|
||||
void interpolate_impl(const RTCInterpolateArguments* const args)
|
||||
{
|
||||
unsigned int primID = args->primID;
|
||||
float u = args->u;
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
/* calculate base pointer and stride */
|
||||
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
|
||||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
|
||||
const char* src = nullptr;
|
||||
size_t stride = 0;
|
||||
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
|
||||
src = vertexAttribs[bufferSlot].getPtr();
|
||||
stride = vertexAttribs[bufferSlot].getStride();
|
||||
} else {
|
||||
src = vertices[bufferSlot].getPtr();
|
||||
stride = vertices[bufferSlot].getStride();
|
||||
}
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
const size_t ofs = i*sizeof(float);
|
||||
const size_t segment = segments[primID];
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+0)*stride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+1)*stride+ofs]);
|
||||
if (P ) mem<vfloat<N>>::storeu(valid,P+i,lerp(p0,p1,u));
|
||||
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0);
|
||||
if (ddPdudu) mem<vfloat<N>>::storeu(valid,dPdu+i,vfloat<N>(zero));
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! returns the number of vertices */
|
||||
__forceinline size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! returns the i'th segment */
|
||||
__forceinline const unsigned int& segment(size_t i) const {
|
||||
return segments[i];
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
/*! returns the i'th segment */
|
||||
template<int M>
|
||||
__forceinline const vuint<M> vsegment(const vuint<M>& i) const {
|
||||
return segments[i.v];
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! returns the segment to the left of the i'th segment */
|
||||
__forceinline bool segmentLeftExists(size_t i) const {
|
||||
assert (flags);
|
||||
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0;
|
||||
}
|
||||
|
||||
/*! returns the segment to the right of the i'th segment */
|
||||
__forceinline bool segmentRightExists(size_t i) const {
|
||||
assert (flags);
|
||||
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0;
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline Vec3ff vertex(size_t i) const {
|
||||
return vertices0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline const char* vertexPtr(size_t i) const {
|
||||
return vertices0.getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th normal of the first time step */
|
||||
__forceinline Vec3fa normal(size_t i) const {
|
||||
return normals0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th radius of the first time step */
|
||||
__forceinline float radius(size_t i) const {
|
||||
return vertices0[i].w;
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
|
||||
return vertices[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
|
||||
return vertices[itime].getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th normal of itime'th timestep */
|
||||
__forceinline Vec3fa normal(size_t i, size_t itime) const {
|
||||
return normals[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th radius of itime'th timestep */
|
||||
__forceinline float radius(size_t i, size_t itime) const {
|
||||
return vertices[itime][i].w;
|
||||
}
|
||||
|
||||
/*! gathers the curve starting with i'th vertex */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid) const
|
||||
{
|
||||
p0 = vertex(vid+0);
|
||||
p1 = vertex(vid+1);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid) const
|
||||
{
|
||||
p0 = vertex(vid.v+0);
|
||||
p1 = vertex(vid.v+1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! gathers the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, size_t itime) const
|
||||
{
|
||||
p0 = vertex(vid+0,itime);
|
||||
p1 = vertex(vid+1,itime);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vint<M>& itime) const
|
||||
{
|
||||
p0 = vertex(vid.v+0,itime.v);
|
||||
p1 = vertex(vid.v+1,itime.v);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3ff a0,a1; gather(a0,a1,vid,itime);
|
||||
Vec3ff b0,b1; gather(b0,b1,vid,itime+1);
|
||||
p0 = madd(Vec3ff(t0),a0,t1*b0);
|
||||
p1 = madd(Vec3ff(t0),a1,t1*b1);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time for mblur and non-mblur case */
|
||||
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather(p0,p1,vid,time);
|
||||
else gather(p0,p1,vid);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vfloat<M>& time) const
|
||||
{
|
||||
vfloat<M> ftime;
|
||||
const vint<M> itime = timeSegment<M>(time, ftime);
|
||||
|
||||
const vfloat<M> t0 = 1.0f - ftime;
|
||||
const vfloat<M> t1 = ftime;
|
||||
Vec4vf<M> a0,a1; vgather<M>(a0,a1,vid,itime);
|
||||
Vec4vf<M> b0,b1; vgather<M>(b0,b1,vid,itime+1);
|
||||
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
|
||||
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! gathers the cone curve starting with i'th vertex */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, unsigned int vid) const
|
||||
{
|
||||
gather(p0,p1,vid);
|
||||
cL = !segmentLeftExists (primID);
|
||||
cR = !segmentRightExists(primID);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid) const
|
||||
{
|
||||
vgather<M>(p0,p1,vid);
|
||||
cL = !segmentLeftExists (primID.v);
|
||||
cR = !segmentRightExists(primID.v);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! gathers the cone curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, size_t itime) const
|
||||
{
|
||||
gather(p0,p1,vid,itime);
|
||||
cL = !segmentLeftExists (primID);
|
||||
cR = !segmentRightExists(primID);
|
||||
}
|
||||
|
||||
/*! loads cone curve vertices for specified time */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
|
||||
{
|
||||
gather(p0,p1,vid,time);
|
||||
cL = !segmentLeftExists (primID);
|
||||
cR = !segmentRightExists(primID);
|
||||
}
|
||||
|
||||
/*! loads cone curve vertices for specified time for mblur and non-mblur geometry */
|
||||
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather(p0,p1,cL,cR,primID,vid,time);
|
||||
else gather(p0,p1,cL,cR,primID,vid);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
|
||||
{
|
||||
vgather<M>(p0,p1,vid,time);
|
||||
cL = !segmentLeftExists (primID.v);
|
||||
cR = !segmentRightExists(primID.v);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! gathers the curve starting with i'th vertex */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid) const
|
||||
{
|
||||
p0 = vertex(vid+0);
|
||||
p1 = vertex(vid+1);
|
||||
p2 = segmentLeftExists (primID) ? vertex(vid-1) : Vec3ff(inf);
|
||||
p3 = segmentRightExists(primID) ? vertex(vid+2) : Vec3ff(inf);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid) const
|
||||
{
|
||||
p0 = vertex(vid.v+0);
|
||||
p1 = vertex(vid.v+1);
|
||||
vbool<M> left = segmentLeftExists (primID.v);
|
||||
vbool<M> right = segmentRightExists(primID.v);
|
||||
vuint<M> i2 = select(left, vid-1,vid+0);
|
||||
vuint<M> i3 = select(right,vid+2,vid+1);
|
||||
p2 = vertex(i2.v);
|
||||
p3 = vertex(i3.v);
|
||||
p2 = select(left, p2,Vec4vf<M>(inf));
|
||||
p3 = select(right,p3,Vec4vf<M>(inf));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! gathers the curve starting with i'th vertex of itime'th timestep */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, size_t itime) const
|
||||
{
|
||||
p0 = vertex(vid+0,itime);
|
||||
p1 = vertex(vid+1,itime);
|
||||
p2 = segmentLeftExists (primID) ? vertex(vid-1,itime) : Vec3ff(inf);
|
||||
p3 = segmentRightExists(primID) ? vertex(vid+2,itime) : Vec3ff(inf);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vint<M>& itime) const
|
||||
{
|
||||
p0 = vertex(vid.v+0, itime.v);
|
||||
p1 = vertex(vid.v+1, itime.v);
|
||||
vbool<M> left = segmentLeftExists (primID.v);
|
||||
vbool<M> right = segmentRightExists(primID.v);
|
||||
vuint<M> i2 = select(left, vid-1,vid+0);
|
||||
vuint<M> i3 = select(right,vid+2,vid+1);
|
||||
p2 = vertex(i2.v, itime.v);
|
||||
p3 = vertex(i3.v, itime.v);
|
||||
p2 = select(left, p2,Vec4vf<M>(inf));
|
||||
p3 = select(right,p3,Vec4vf<M>(inf));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! loads curve vertices for specified time */
|
||||
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3ff a0,a1,a2,a3; gather(a0,a1,a2,a3,primID,vid,itime);
|
||||
Vec3ff b0,b1,b2,b3; gather(b0,b1,b2,b3,primID,vid,itime+1);
|
||||
p0 = madd(Vec3ff(t0),a0,t1*b0);
|
||||
p1 = madd(Vec3ff(t0),a1,t1*b1);
|
||||
p2 = madd(Vec3ff(t0),a2,t1*b2);
|
||||
p3 = madd(Vec3ff(t0),a3,t1*b3);
|
||||
}
|
||||
|
||||
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
|
||||
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) gather(p0,p1,p2,p3,primID,vid,time);
|
||||
else gather(p0,p1,p2,p3,primID,vid);
|
||||
}
|
||||
|
||||
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
|
||||
template<int M>
|
||||
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
|
||||
{
|
||||
vfloat<M> ftime;
|
||||
const vint<M> itime = timeSegment<M>(time, ftime);
|
||||
|
||||
const vfloat<M> t0 = 1.0f - ftime;
|
||||
const vfloat<M> t1 = ftime;
|
||||
Vec4vf<M> a0,a1,a2,a3; vgather<M>(a0,a1,a2,a3,primID,vid,itime);
|
||||
Vec4vf<M> b0,b1,b2,b3; vgather<M>(b0,b1,b2,b3,primID,vid,itime+1);
|
||||
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
|
||||
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
|
||||
p2 = madd(Vec4vf<M>(t0),a2,t1*b2);
|
||||
p3 = madd(Vec4vf<M>(t0),a3,t1*b3);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const
|
||||
{
|
||||
const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1));
|
||||
return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w)));
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
const unsigned int index = segment(i);
|
||||
const Vec3ff v0 = vertex(index+0);
|
||||
const Vec3ff v1 = vertex(index+1);
|
||||
return bounds(v0,v1);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment for the itime'th time step */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime) const
|
||||
{
|
||||
const unsigned int index = segment(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
return bounds(v0,v1);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
|
||||
{
|
||||
const unsigned int index = segment(i);
|
||||
const Vec3ff v0 = vertex(index+0);
|
||||
const Vec3ff v1 = vertex(index+1);
|
||||
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
|
||||
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
|
||||
return bounds(w0,w1);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment for the itime'th time step */
|
||||
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
|
||||
{
|
||||
const unsigned int index = segment(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
|
||||
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
|
||||
return bounds(w0,w1);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th segment */
|
||||
__forceinline BBox3fa bounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
|
||||
{
|
||||
const float r_scale = r_scale0*scale;
|
||||
const unsigned int index = segment(i);
|
||||
const Vec3ff v0 = vertex(index+0,itime);
|
||||
const Vec3ff v1 = vertex(index+1,itime);
|
||||
const Vec3ff w0(xfmVector(space,(v0-ofs)*Vec3fa(scale)),maxRadiusScale*v0.w*r_scale);
|
||||
const Vec3ff w1(xfmVector(space,(v1-ofs)*Vec3fa(scale)),maxRadiusScale*v1.w*r_scale);
|
||||
return bounds(w0,w1);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid at the itime'th timestep */
|
||||
__forceinline bool valid(size_t i, size_t itime) const {
|
||||
return valid(i, make_range(itime, itime));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
const unsigned int index = segment(i);
|
||||
if (index+1 >= numVertices()) return false;
|
||||
|
||||
#if !defined(__SYCL_DEVICE_ONLY__)
|
||||
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
{
|
||||
const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false;
|
||||
const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false;
|
||||
if (min(v0.w,v1.w) < 0.0f) return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
|
||||
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
|
||||
{
|
||||
if (!valid(i,0)) return false;
|
||||
*bbox = bounds(i);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i,itime+0) || !valid(i,itime+1)) return false;
|
||||
bbox = bounds(i,itime); // use bounds of first time step in builder
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(ofs, scale, r_scale0, space, primID, itime); }, dt, this->time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i, timeSegmentRange(time_range))) return false;
|
||||
bbox = linearBounds(i, time_range);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! get fast access to first vertex buffer */
|
||||
__forceinline float * getCompactVertexArray () const {
|
||||
return (float*) vertices0.getPtr();
|
||||
}
|
||||
|
||||
public:
|
||||
BufferView<unsigned int> segments; //!< array of line segment indices
|
||||
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
|
||||
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
|
||||
BufferView<char> flags; //!< start, end flag per segment
|
||||
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
|
||||
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
|
||||
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
|
||||
int tessellationRate; //!< tessellation rate for bezier curve
|
||||
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct LineSegmentsISA : public LineSegments
|
||||
{
|
||||
LineSegmentsISA (Device* device, Geometry::GType gtype)
|
||||
: LineSegments(device,gtype) {}
|
||||
|
||||
LinearSpace3fa computeAlignedSpace(const size_t primID) const
|
||||
{
|
||||
const Vec3fa dir = normalize(computeDirection(primID));
|
||||
if (is_finite(dir)) return frame(dir);
|
||||
else return LinearSpace3fa(one);
|
||||
}
|
||||
|
||||
LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const
|
||||
{
|
||||
Vec3fa axisz(0,0,1);
|
||||
Vec3fa axisy(0,1,0);
|
||||
|
||||
const range<int> tbounds = this->timeSegmentRange(time_range);
|
||||
if (tbounds.size() == 0) return frame(axisz);
|
||||
|
||||
const size_t itime = (tbounds.begin()+tbounds.end())/2;
|
||||
|
||||
const Vec3fa dir = normalize(computeDirection(primID,itime));
|
||||
if (is_finite(dir)) return frame(dir);
|
||||
else return LinearSpace3fa(one);
|
||||
}
|
||||
|
||||
Vec3fa computeDirection(unsigned int primID) const
|
||||
{
|
||||
const unsigned vtxID = segment(primID);
|
||||
const Vec3fa v0 = vertex(vtxID+0);
|
||||
const Vec3fa v1 = vertex(vtxID+1);
|
||||
return v1-v0;
|
||||
}
|
||||
|
||||
Vec3fa computeDirection(unsigned int primID, size_t time) const
|
||||
{
|
||||
const unsigned vtxID = segment(primID);
|
||||
const Vec3fa v0 = vertex(vtxID+0,time);
|
||||
const Vec3fa v1 = vertex(vtxID+1,time);
|
||||
return v1-v0;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j,&bounds)) continue;
|
||||
const PrimRef prim(bounds,geomID,unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j,itime,bounds)) continue;
|
||||
const PrimRef prim(bounds,geomID,unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
|
||||
if (t0t1.empty()) return pinfo;
|
||||
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
LBBox3fa lbounds = empty;
|
||||
if (!linearBounds(j, t0t1, lbounds))
|
||||
continue;
|
||||
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!valid(j, timeSegmentRange(t0t1))) continue;
|
||||
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
|
||||
pinfo.add_primref(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
BBox3fa vbounds(size_t i) const {
|
||||
return bounds(i);
|
||||
}
|
||||
|
||||
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const {
|
||||
return bounds(space,i);
|
||||
}
|
||||
|
||||
BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
|
||||
return bounds(ofs,scale,r_scale0,space,i,itime);
|
||||
}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(primID,time_range);
|
||||
}
|
||||
|
||||
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(space,primID,time_range);
|
||||
}
|
||||
|
||||
LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(ofs,scale,r_scale0,space,primID,time_range);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType);
|
||||
}
|
||||
361
engine/thirdparty/embree/kernels/common/scene_points.h
vendored
Normal file
361
engine/thirdparty/embree/kernels/common/scene_points.h
vendored
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "buffer.h"
|
||||
#include "default.h"
|
||||
#include "geometry.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! represents an array of points */
|
||||
struct Points : public Geometry
|
||||
{
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS;
|
||||
|
||||
public:
|
||||
/*! line segments construction */
|
||||
Points(Device* device, Geometry::GType gtype);
|
||||
|
||||
public:
|
||||
void setMask(unsigned mask);
|
||||
void setNumTimeSteps(unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount(unsigned int N);
|
||||
void setBuffer(RTCBufferType type,
|
||||
unsigned int slot,
|
||||
RTCFormat format,
|
||||
const Ref<Buffer>& buffer,
|
||||
size_t offset,
|
||||
size_t stride,
|
||||
unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void commit();
|
||||
bool verify();
|
||||
void setMaxRadiusScale(float s);
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
|
||||
public:
|
||||
/*! returns the number of vertices */
|
||||
__forceinline size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline Vec3ff vertex(size_t i) const {
|
||||
return vertices0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of the first time step */
|
||||
__forceinline const char* vertexPtr(size_t i) const {
|
||||
return vertices0.getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th normal of the first time step */
|
||||
__forceinline Vec3fa normal(size_t i) const {
|
||||
return normals0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th radius of the first time step */
|
||||
__forceinline float radius(size_t i) const {
|
||||
return vertices0[i].w;
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
|
||||
return vertices[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of for specified time */
|
||||
__forceinline Vec3ff vertex(size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3ff v0 = vertex(i, itime+0);
|
||||
Vec3ff v1 = vertex(i, itime+1);
|
||||
return madd(Vec3ff(t0),v0,t1*v1);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of for specified time */
|
||||
__forceinline Vec3ff vertex_safe(size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) return vertex(i,time);
|
||||
else return vertex(i);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
|
||||
return vertices[itime].getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th normal of itime'th timestep */
|
||||
__forceinline Vec3fa normal(size_t i, size_t itime) const {
|
||||
return normals[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th normal of for specified time */
|
||||
__forceinline Vec3fa normal(size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3fa n0 = normal(i, itime+0);
|
||||
Vec3fa n1 = normal(i, itime+1);
|
||||
return madd(Vec3fa(t0),n0,t1*n1);
|
||||
}
|
||||
|
||||
/*! returns i'th normal of for specified time */
|
||||
__forceinline Vec3fa normal_safe(size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) return normal(i,time);
|
||||
else return normal(i);
|
||||
}
|
||||
|
||||
/*! returns i'th radius of itime'th timestep */
|
||||
__forceinline float radius(size_t i, size_t itime) const {
|
||||
return vertices[itime][i].w;
|
||||
}
|
||||
|
||||
/*! returns i'th radius of for specified time */
|
||||
__forceinline float radius(size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
float r0 = radius(i, itime+0);
|
||||
float r1 = radius(i, itime+1);
|
||||
return madd(t0,r0,t1*r1);
|
||||
}
|
||||
|
||||
/*! returns i'th radius of for specified time */
|
||||
__forceinline float radius_safe(size_t i, float time) const
|
||||
{
|
||||
if (hasMotionBlur()) return radius(i,time);
|
||||
else return radius(i);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(const Vec3ff& v0) const {
|
||||
return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w));
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
const Vec3ff v0 = vertex(i);
|
||||
return bounds(v0);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment for the itime'th time step */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime) const
|
||||
{
|
||||
const Vec3ff v0 = vertex(i, itime);
|
||||
return bounds(v0);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment */
|
||||
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
|
||||
{
|
||||
const Vec3ff v0 = vertex(i);
|
||||
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
|
||||
return bounds(w0);
|
||||
}
|
||||
|
||||
/*! calculates bounding box of i'th line segment for the itime'th time step */
|
||||
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
|
||||
{
|
||||
const Vec3ff v0 = vertex(i, itime);
|
||||
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
|
||||
return bounds(w0);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid at the itime'th timestep */
|
||||
__forceinline bool valid(size_t i, size_t itime) const {
|
||||
return valid(i, make_range(itime, itime));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
const unsigned int index = (unsigned int)i;
|
||||
if (index >= numVertices())
|
||||
return false;
|
||||
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) {
|
||||
const Vec3ff v0 = vertex(index + 0, itime);
|
||||
if (unlikely(!isvalid4(v0)))
|
||||
return false;
|
||||
if (v0.w < 0.0f)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
|
||||
return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1));
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
|
||||
{
|
||||
if (!valid(i, 0))
|
||||
return false;
|
||||
*bbox = bounds(i);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i, itime + 0) || !valid(i, itime + 1))
|
||||
return false;
|
||||
bbox = bounds(i, itime); // use bounds of first time step in builder
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i, timeSegmentRange(time_range))) return false;
|
||||
bbox = linearBounds(i, time_range);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! get fast access to first vertex buffer */
|
||||
__forceinline float * getCompactVertexArray () const {
|
||||
return (float*) vertices0.getPtr();
|
||||
}
|
||||
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
const float R = radius(i);
|
||||
return 1 + 2*M_PI*R*R;
|
||||
}
|
||||
|
||||
public:
|
||||
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
|
||||
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
|
||||
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
|
||||
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
|
||||
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
|
||||
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct PointsISA : public Points
|
||||
{
|
||||
PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {}
|
||||
|
||||
Vec3fa computeDirection(unsigned int primID) const
|
||||
{
|
||||
return Vec3fa(1, 0, 0);
|
||||
}
|
||||
|
||||
Vec3fa computeDirection(unsigned int primID, size_t time) const
|
||||
{
|
||||
return Vec3fa(1, 0, 0);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j, &bounds))
|
||||
continue;
|
||||
const PrimRef prim(bounds, geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j, itime, bounds))
|
||||
continue;
|
||||
const PrimRef prim(bounds, geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
|
||||
if (t0t1.empty()) return pinfo;
|
||||
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
LBBox3fa lbounds = empty;
|
||||
if (!linearBounds(j, t0t1, lbounds))
|
||||
continue;
|
||||
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims,
|
||||
const BBox1f& t0t1,
|
||||
const range<size_t>& r,
|
||||
size_t k,
|
||||
unsigned int geomID) const
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
if (!valid(j, timeSegmentRange(t0t1)))
|
||||
continue;
|
||||
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
|
||||
pinfo.add_primref(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
BBox3fa vbounds(size_t i) const
|
||||
{
|
||||
return bounds(i);
|
||||
}
|
||||
|
||||
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const
|
||||
{
|
||||
return bounds(space, i);
|
||||
}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const
|
||||
{
|
||||
return linearBounds(primID, time_range);
|
||||
}
|
||||
|
||||
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const
|
||||
{
|
||||
return linearBounds(space, primID, time_range);
|
||||
}
|
||||
};
|
||||
} // namespace isa
|
||||
|
||||
DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType);
|
||||
} // namespace embree
|
||||
376
engine/thirdparty/embree/kernels/common/scene_quad_mesh.h
vendored
Normal file
376
engine/thirdparty/embree/kernels/common/scene_quad_mesh.h
vendored
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "geometry.h"
|
||||
#include "buffer.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! Quad Mesh */
|
||||
struct QuadMesh : public Geometry
|
||||
{
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH;
|
||||
|
||||
/*! triangle indices */
|
||||
struct Quad
|
||||
{
|
||||
Quad() {}
|
||||
|
||||
Quad (uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) {
|
||||
v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3;
|
||||
}
|
||||
|
||||
/*! outputs triangle indices */
|
||||
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) {
|
||||
return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }";
|
||||
}
|
||||
|
||||
uint32_t v[4];
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! quad mesh construction */
|
||||
QuadMesh (Device* device);
|
||||
|
||||
/* geometry interface */
|
||||
public:
|
||||
void setMask(unsigned mask);
|
||||
void setNumTimeSteps (unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount (unsigned int N);
|
||||
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void commit();
|
||||
bool verify();
|
||||
void interpolate(const RTCInterpolateArguments* const args);
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
|
||||
template<int N>
|
||||
void interpolate_impl(const RTCInterpolateArguments* const args)
|
||||
{
|
||||
unsigned int primID = args->primID;
|
||||
float u = args->u;
|
||||
float v = args->v;
|
||||
RTCBufferType bufferType = args->bufferType;
|
||||
unsigned int bufferSlot = args->bufferSlot;
|
||||
float* P = args->P;
|
||||
float* dPdu = args->dPdu;
|
||||
float* dPdv = args->dPdv;
|
||||
float* ddPdudu = args->ddPdudu;
|
||||
float* ddPdvdv = args->ddPdvdv;
|
||||
float* ddPdudv = args->ddPdudv;
|
||||
unsigned int valueCount = args->valueCount;
|
||||
|
||||
/* calculate base pointer and stride */
|
||||
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
|
||||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
|
||||
const char* src = nullptr;
|
||||
size_t stride = 0;
|
||||
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
|
||||
src = vertexAttribs[bufferSlot].getPtr();
|
||||
stride = vertexAttribs[bufferSlot].getStride();
|
||||
} else {
|
||||
src = vertices[bufferSlot].getPtr();
|
||||
stride = vertices[bufferSlot].getStride();
|
||||
}
|
||||
|
||||
for (unsigned int i=0; i<valueCount; i+=N)
|
||||
{
|
||||
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
|
||||
const size_t ofs = i*sizeof(float);
|
||||
const Quad& tri = quad(primID);
|
||||
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
|
||||
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
|
||||
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
|
||||
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]);
|
||||
const vbool<N> left = u+v <= 1.0f;
|
||||
const vfloat<N> Q0 = select(left,p0,p2);
|
||||
const vfloat<N> Q1 = select(left,p1,p3);
|
||||
const vfloat<N> Q2 = select(left,p3,p1);
|
||||
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
|
||||
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
|
||||
const vfloat<N> W = 1.0f-U-V;
|
||||
if (P) {
|
||||
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
|
||||
}
|
||||
if (dPdu) {
|
||||
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1));
|
||||
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2));
|
||||
}
|
||||
if (ddPdudu) {
|
||||
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
|
||||
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
|
||||
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*! returns number of vertices */
|
||||
__forceinline size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! returns i'th quad */
|
||||
__forceinline const Quad& quad(size_t i) const {
|
||||
return quads[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const Vec3fa vertex(size_t i) const {
|
||||
return vertices0[i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const char* vertexPtr(size_t i) const {
|
||||
return vertices0.getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
|
||||
return vertices[itime][i];
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of itime'th timestep */
|
||||
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
|
||||
return vertices[itime].getPtr(i);
|
||||
}
|
||||
|
||||
/*! returns i'th vertex of for specified time */
|
||||
__forceinline Vec3fa vertex(size_t i, float time) const
|
||||
{
|
||||
float ftime;
|
||||
const size_t itime = timeSegment(time, ftime);
|
||||
const float t0 = 1.0f - ftime;
|
||||
const float t1 = ftime;
|
||||
Vec3fa v0 = vertex(i, itime+0);
|
||||
Vec3fa v1 = vertex(i, itime+1);
|
||||
return madd(Vec3fa(t0),v0,t1*v1);
|
||||
}
|
||||
|
||||
/*! calculates the bounds of the i'th quad */
|
||||
__forceinline BBox3fa bounds(size_t i) const
|
||||
{
|
||||
const Quad& q = quad(i);
|
||||
const Vec3fa v0 = vertex(q.v[0]);
|
||||
const Vec3fa v1 = vertex(q.v[1]);
|
||||
const Vec3fa v2 = vertex(q.v[2]);
|
||||
const Vec3fa v3 = vertex(q.v[3]);
|
||||
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
|
||||
}
|
||||
|
||||
/*! calculates the bounds of the i'th quad at the itime'th timestep */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t itime) const
|
||||
{
|
||||
const Quad& q = quad(i);
|
||||
const Vec3fa v0 = vertex(q.v[0],itime);
|
||||
const Vec3fa v1 = vertex(q.v[1],itime);
|
||||
const Vec3fa v2 = vertex(q.v[2],itime);
|
||||
const Vec3fa v3 = vertex(q.v[3],itime);
|
||||
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid at the itime'th timestep */
|
||||
__forceinline bool valid(size_t i, size_t itime) const {
|
||||
return valid(i, make_range(itime, itime));
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid between the specified time range */
|
||||
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
|
||||
{
|
||||
const Quad& q = quad(i);
|
||||
if (unlikely(q.v[0] >= numVertices())) return false;
|
||||
if (unlikely(q.v[1] >= numVertices())) return false;
|
||||
if (unlikely(q.v[2] >= numVertices())) return false;
|
||||
if (unlikely(q.v[3] >= numVertices())) return false;
|
||||
|
||||
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
|
||||
{
|
||||
if (!isvalid(vertex(q.v[0],itime))) return false;
|
||||
if (!isvalid(vertex(q.v[1],itime))) return false;
|
||||
if (!isvalid(vertex(q.v[2],itime))) return false;
|
||||
if (!isvalid(vertex(q.v[3],itime))) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */
|
||||
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
|
||||
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
|
||||
{
|
||||
const Quad& q = quad(i);
|
||||
if (q.v[0] >= numVertices()) return false;
|
||||
if (q.v[1] >= numVertices()) return false;
|
||||
if (q.v[2] >= numVertices()) return false;
|
||||
if (q.v[3] >= numVertices()) return false;
|
||||
|
||||
for (size_t t=0; t<numTimeSteps; t++)
|
||||
{
|
||||
const Vec3fa v0 = vertex(q.v[0],t);
|
||||
const Vec3fa v1 = vertex(q.v[1],t);
|
||||
const Vec3fa v2 = vertex(q.v[2],t);
|
||||
const Vec3fa v3 = vertex(q.v[3],t);
|
||||
|
||||
if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bbox)
|
||||
*bbox = bounds(i);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
|
||||
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
|
||||
{
|
||||
const Quad& q = quad(i);
|
||||
if (unlikely(q.v[0] >= numVertices())) return false;
|
||||
if (unlikely(q.v[1] >= numVertices())) return false;
|
||||
if (unlikely(q.v[2] >= numVertices())) return false;
|
||||
if (unlikely(q.v[3] >= numVertices())) return false;
|
||||
|
||||
assert(itime+1 < numTimeSteps);
|
||||
const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
|
||||
const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
|
||||
const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
|
||||
const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false;
|
||||
const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
|
||||
const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
|
||||
const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
|
||||
const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false;
|
||||
|
||||
/* use bounds of first time step in builder */
|
||||
bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
|
||||
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
|
||||
}
|
||||
|
||||
/*! calculates the linear bounds of the i'th primitive for the specified time range */
|
||||
__forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const
|
||||
{
|
||||
if (!valid(i, timeSegmentRange(dt))) return false;
|
||||
bbox = linearBounds(i, dt);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! get fast access to first vertex buffer */
|
||||
__forceinline float * getCompactVertexArray () const {
|
||||
return (float*) vertices0.getPtr();
|
||||
}
|
||||
|
||||
/* gets version info of topology */
|
||||
unsigned int getTopologyVersion() const {
|
||||
return quads.modCounter;
|
||||
}
|
||||
|
||||
/* returns true if topology changed */
|
||||
bool topologyChanged(unsigned int otherVersion) const {
|
||||
return quads.isModified(otherVersion); // || numPrimitivesChanged;
|
||||
}
|
||||
|
||||
/* returns the projected area */
|
||||
__forceinline float projectedPrimitiveArea(const size_t i) const {
|
||||
const Quad& q = quad(i);
|
||||
const Vec3fa v0 = vertex(q.v[0]);
|
||||
const Vec3fa v1 = vertex(q.v[1]);
|
||||
const Vec3fa v2 = vertex(q.v[2]);
|
||||
const Vec3fa v3 = vertex(q.v[3]);
|
||||
return areaProjectedTriangle(v0,v1,v3) +
|
||||
areaProjectedTriangle(v1,v2,v3);
|
||||
}
|
||||
|
||||
public:
|
||||
BufferView<Quad> quads; //!< array of quads
|
||||
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
|
||||
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
|
||||
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attribute buffers
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct QuadMeshISA : public QuadMesh
|
||||
{
|
||||
QuadMeshISA (Device* device)
|
||||
: QuadMesh(device) {}
|
||||
|
||||
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
|
||||
return linearBounds(primID,time_range);
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j,&bounds)) continue;
|
||||
const PrimRef prim(bounds,geomID,unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
BBox3fa bounds = empty;
|
||||
if (!buildBounds(j,itime,bounds)) continue;
|
||||
const PrimRef prim(bounds,geomID,unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfo pinfo(empty);
|
||||
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
|
||||
if (t0t1.empty()) return pinfo;
|
||||
|
||||
for (size_t j = r.begin(); j < r.end(); j++) {
|
||||
LBBox3fa lbounds = empty;
|
||||
if (!linearBounds(j, t0t1, lbounds))
|
||||
continue;
|
||||
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
|
||||
pinfo.add_center2(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
|
||||
{
|
||||
PrimInfoMB pinfo(empty);
|
||||
for (size_t j=r.begin(); j<r.end(); j++)
|
||||
{
|
||||
if (!valid(j, timeSegmentRange(t0t1))) continue;
|
||||
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
|
||||
pinfo.add_primref(prim);
|
||||
prims[k++] = prim;
|
||||
}
|
||||
return pinfo;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*);
|
||||
}
|
||||
329
engine/thirdparty/embree/kernels/common/scene_subdiv_mesh.h
vendored
Normal file
329
engine/thirdparty/embree/kernels/common/scene_subdiv_mesh.h
vendored
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
// Copyright 2009-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "geometry.h"
|
||||
#include "buffer.h"
|
||||
#include "../subdiv/half_edge.h"
|
||||
#include "../subdiv/tessellation_cache.h"
|
||||
#include "../subdiv/catmullclark_coefficients.h"
|
||||
#include "../subdiv/patch.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct HoleSet;
|
||||
struct VertexCreaseMap;
|
||||
struct EdgeCreaseMap;
|
||||
|
||||
class SubdivMesh : public Geometry
|
||||
{
|
||||
ALIGNED_CLASS_(16);
|
||||
public:
|
||||
|
||||
typedef HalfEdge::Edge Edge;
|
||||
|
||||
/*! type of this geometry */
|
||||
static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH;
|
||||
|
||||
/*! structure used to sort half edges using radix sort by their key */
|
||||
struct KeyHalfEdge
|
||||
{
|
||||
KeyHalfEdge() {}
|
||||
|
||||
KeyHalfEdge (uint64_t key, HalfEdge* edge)
|
||||
: key(key), edge(edge) {}
|
||||
|
||||
__forceinline operator uint64_t() const {
|
||||
return key;
|
||||
}
|
||||
|
||||
friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) {
|
||||
return e0.key < e1.key;
|
||||
}
|
||||
|
||||
public:
|
||||
uint64_t key;
|
||||
HalfEdge* edge;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! subdiv mesh construction */
|
||||
SubdivMesh(Device* device);
|
||||
~SubdivMesh();
|
||||
|
||||
public:
|
||||
void setMask (unsigned mask);
|
||||
void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode);
|
||||
void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID);
|
||||
void setNumTimeSteps (unsigned int numTimeSteps);
|
||||
void setVertexAttributeCount (unsigned int N);
|
||||
void setTopologyCount (unsigned int N);
|
||||
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
|
||||
void* getBuffer(RTCBufferType type, unsigned int slot);
|
||||
void updateBuffer(RTCBufferType type, unsigned int slot);
|
||||
void setTessellationRate(float N);
|
||||
bool verify();
|
||||
void commit();
|
||||
void addElementsToCount (GeometryCounts & counts) const;
|
||||
void setDisplacementFunction (RTCDisplacementFunctionN func);
|
||||
unsigned int getFirstHalfEdge(unsigned int faceID);
|
||||
unsigned int getFace(unsigned int edgeID);
|
||||
unsigned int getNextHalfEdge(unsigned int edgeID);
|
||||
unsigned int getPreviousHalfEdge(unsigned int edgeID);
|
||||
unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID);
|
||||
|
||||
public:
|
||||
|
||||
/*! return the number of faces */
|
||||
size_t numFaces() const {
|
||||
return faceVertices.size();
|
||||
}
|
||||
|
||||
/*! return the number of edges */
|
||||
size_t numEdges() const {
|
||||
return topology[0].vertexIndices.size();
|
||||
}
|
||||
|
||||
/*! return the number of vertices */
|
||||
size_t numVertices() const {
|
||||
return vertices[0].size();
|
||||
}
|
||||
|
||||
/*! calculates the bounds of the i'th subdivision patch at the j'th timestep */
|
||||
__forceinline BBox3fa bounds(size_t i, size_t j = 0) const {
|
||||
return topology[0].getHalfEdge(i)->bounds(vertices[j]);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid */
|
||||
__forceinline bool valid(size_t i) const {
|
||||
return topology[0].valid(i) && !invalidFace(i);
|
||||
}
|
||||
|
||||
/*! check if the i'th primitive is valid for the j'th time range */
|
||||
__forceinline bool valid(size_t i, size_t j) const {
|
||||
return topology[0].valid(i) && !invalidFace(i,j);
|
||||
}
|
||||
|
||||
/*! prints some statistics */
|
||||
void printStatistics();
|
||||
|
||||
/*! initializes the half edge data structure */
|
||||
void initializeHalfEdgeStructures ();
|
||||
|
||||
public:
|
||||
|
||||
/*! returns the vertex buffer for some time step */
|
||||
__forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const {
|
||||
return vertices[t];
|
||||
}
|
||||
|
||||
/* returns tessellation level of edge */
|
||||
__forceinline float getEdgeLevel(const size_t i) const
|
||||
{
|
||||
if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level?
|
||||
else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level?
|
||||
}
|
||||
|
||||
public:
|
||||
RTCDisplacementFunctionN displFunc; //!< displacement function
|
||||
|
||||
/*! all buffers in this section are provided by the application */
|
||||
public:
|
||||
|
||||
/*! the topology contains all data that may differ when
|
||||
* interpolating different user data buffers */
|
||||
struct Topology
|
||||
{
|
||||
public:
|
||||
|
||||
/*! Default topology construction */
|
||||
Topology () : halfEdges(nullptr,0) {}
|
||||
|
||||
/*! Topology initialization */
|
||||
Topology (SubdivMesh* mesh);
|
||||
|
||||
/*! make the class movable */
|
||||
public:
|
||||
Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
|
||||
: mesh(std::move(other.mesh)),
|
||||
vertexIndices(std::move(other.vertexIndices)),
|
||||
subdiv_mode(std::move(other.subdiv_mode)),
|
||||
halfEdges(std::move(other.halfEdges)),
|
||||
halfEdges0(std::move(other.halfEdges0)),
|
||||
halfEdges1(std::move(other.halfEdges1)) {}
|
||||
|
||||
Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
|
||||
{
|
||||
mesh = std::move(other.mesh);
|
||||
vertexIndices = std::move(other.vertexIndices);
|
||||
subdiv_mode = std::move(other.subdiv_mode);
|
||||
halfEdges = std::move(other.halfEdges);
|
||||
halfEdges0 = std::move(other.halfEdges0);
|
||||
halfEdges1 = std::move(other.halfEdges1);
|
||||
return *this;
|
||||
}
|
||||
|
||||
public:
|
||||
/*! check if the i'th primitive is valid in this topology */
|
||||
__forceinline bool valid(size_t i) const
|
||||
{
|
||||
if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) {
|
||||
if (getHalfEdge(i)->faceHasBorder()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*! updates the interpolation mode for the topology */
|
||||
void setSubdivisionMode (RTCSubdivisionMode mode);
|
||||
|
||||
/*! marks all buffers as modified */
|
||||
void update ();
|
||||
|
||||
/*! verifies index array */
|
||||
bool verify (size_t numVertices);
|
||||
|
||||
/*! initializes the half edge data structure */
|
||||
void initializeHalfEdgeStructures ();
|
||||
|
||||
private:
|
||||
|
||||
/*! recalculates the half edges */
|
||||
void calculateHalfEdges();
|
||||
|
||||
/*! updates half edges when recalculation is not necessary */
|
||||
void updateHalfEdges();
|
||||
|
||||
/*! user input data */
|
||||
public:
|
||||
|
||||
SubdivMesh* mesh;
|
||||
|
||||
/*! indices of the vertices composing each face */
|
||||
BufferView<unsigned int> vertexIndices;
|
||||
|
||||
/*! subdiv interpolation mode */
|
||||
RTCSubdivisionMode subdiv_mode;
|
||||
|
||||
/*! generated data */
|
||||
public:
|
||||
|
||||
/*! returns the start half edge for face f */
|
||||
__forceinline const HalfEdge* getHalfEdge ( const size_t f ) const {
|
||||
return &halfEdges[mesh->faceStartEdge[f]];
|
||||
}
|
||||
|
||||
/*! Half edge structure, generated by initHalfEdgeStructures */
|
||||
mvector<HalfEdge> halfEdges;
|
||||
|
||||
/*! the following data is only required during construction of the
|
||||
* half edge structure and can be cleared for static scenes */
|
||||
private:
|
||||
|
||||
/*! two arrays used to sort the half edges */
|
||||
std::vector<KeyHalfEdge> halfEdges0;
|
||||
std::vector<KeyHalfEdge> halfEdges1;
|
||||
};
|
||||
|
||||
/*! returns the start half edge for topology t and face f */
|
||||
__forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const {
|
||||
return topology[t].getHalfEdge(f);
|
||||
}
|
||||
|
||||
/*! buffer containing the number of vertices for each face */
|
||||
BufferView<unsigned int> faceVertices;
|
||||
|
||||
/*! array of topologies */
|
||||
vector<Topology> topology;
|
||||
|
||||
/*! vertex buffer (one buffer for each time step) */
|
||||
vector<BufferView<Vec3fa>> vertices;
|
||||
|
||||
/*! user data buffers */
|
||||
vector<RawBufferView> vertexAttribs;
|
||||
|
||||
/*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */
|
||||
BufferView<Edge> edge_creases;
|
||||
|
||||
/*! edge crease weights for each edge of the edge_creases buffer */
|
||||
BufferView<float> edge_crease_weights;
|
||||
|
||||
/*! vertex crease buffer containing all vertices that carry vertex crease weights */
|
||||
BufferView<unsigned int> vertex_creases;
|
||||
|
||||
/*! vertex crease weights for each vertex of the vertex_creases buffer */
|
||||
BufferView<float> vertex_crease_weights;
|
||||
|
||||
/*! subdivision level for each half edge of the vertexIndices buffer */
|
||||
BufferView<float> levels;
|
||||
float tessellationRate; // constant rate that is used when levels is not set
|
||||
|
||||
/*! buffer that marks specific faces as holes */
|
||||
BufferView<unsigned> holes;
|
||||
|
||||
/*! all data in this section is generated by initializeHalfEdgeStructures function */
|
||||
private:
|
||||
|
||||
/*! number of half edges used by faces */
|
||||
size_t numHalfEdges;
|
||||
|
||||
/*! fast lookup table to find the first half edge for some face */
|
||||
mvector<uint32_t> faceStartEdge;
|
||||
|
||||
/*! fast lookup table to find the face for some half edge */
|
||||
mvector<uint32_t> halfEdgeFace;
|
||||
|
||||
/*! set with all holes */
|
||||
std::unique_ptr<HoleSet> holeSet;
|
||||
|
||||
/*! fast lookup table to detect invalid faces */
|
||||
mvector<char> invalid_face;
|
||||
|
||||
/*! test if face i is invalid in timestep j */
|
||||
__forceinline char& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; }
|
||||
__forceinline const char& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; }
|
||||
|
||||
/*! interpolation cache */
|
||||
public:
|
||||
static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; }
|
||||
static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; }
|
||||
static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) {
|
||||
const size_t slots = numInterpolationSlots4(stride);
|
||||
assert(slot < slots);
|
||||
return slots*prim+slot;
|
||||
}
|
||||
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags;
|
||||
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags;
|
||||
std::vector<Patch3fa::Ref> patch_eval_trees;
|
||||
|
||||
/*! the following data is only required during construction of the
|
||||
* half edge structure and can be cleared for static scenes */
|
||||
private:
|
||||
|
||||
/*! map with all vertex creases */
|
||||
std::unique_ptr<VertexCreaseMap> vertexCreaseMap;
|
||||
|
||||
/*! map with all edge creases */
|
||||
std::unique_ptr<EdgeCreaseMap> edgeCreaseMap;
|
||||
|
||||
protected:
|
||||
|
||||
/*! counts number of geometry commits */
|
||||
size_t commitCounter;
|
||||
};
|
||||
|
||||
namespace isa
|
||||
{
|
||||
struct SubdivMeshISA : public SubdivMesh
|
||||
{
|
||||
SubdivMeshISA (Device* device)
|
||||
: SubdivMesh(device) {}
|
||||
|
||||
void interpolate(const RTCInterpolateArguments* const args);
|
||||
void interpolateN(const RTCInterpolateNArguments* const args);
|
||||
};
|
||||
}
|
||||
|
||||
DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*);
|
||||
};
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue