feat: godot-engine-source-4.3-stable

This commit is contained in:
Jan van der Weide 2025-01-17 16:36:38 +01:00
parent c59a7dcade
commit 7125d019b5
11149 changed files with 5070401 additions and 0 deletions

View file

@ -0,0 +1,411 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../geometry/primitive.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/heuristic_binning_array_aligned.h"
#include "../builders/heuristic_binning_array_unaligned.h"
#include "../builders/heuristic_strand_array.h"
#define NUM_HAIR_OBJECT_BINS 32
namespace embree
{
namespace isa
{
struct BVHBuilderHair
{
/*! settings for builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
size_t finished_range_threshold; //!< finished range threshold
};
template<typename NodeRef,
typename CreateAllocFunc,
typename CreateAABBNodeFunc,
typename SetAABBNodeFunc,
typename CreateOBBNodeFunc,
typename SetOBBNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor,
typename ReportFinishedRangeFunc>
class BuilderT
{
ALIGNED_CLASS_(16);
friend struct BVHBuilderHair;
typedef FastAllocator::CachedAllocator Allocator;
typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
static const size_t travCostAligned = 1;
static const size_t travCostUnaligned = 5;
static const size_t intCost = 6;
BuilderT (Scene* scene,
PrimRef* prims,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeFunc& createAABBNode,
const SetAABBNodeFunc& setAABBNode,
const CreateOBBNodeFunc& createOBBNode,
const SetOBBNodeFunc& setOBBNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const ReportFinishedRangeFunc& reportFinishedRange,
const Settings settings)
: cfg(settings),
prims(prims),
createAlloc(createAlloc),
createAABBNode(createAABBNode),
setAABBNode(setAABBNode),
createOBBNode(createOBBNode),
setOBBNode(setOBBNode),
createLeaf(createLeaf),
progressMonitor(progressMonitor),
reportFinishedRange(reportFinishedRange),
alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry(const PrimInfoRange& range)
{
if (range.size() == 0) return true;
unsigned int firstGeomID = prims[range.begin()].geomID();
for (size_t i=range.begin()+1; i<range.end(); i++) {
if (prims[i].geomID() != firstGeomID){
return false;
}
}
return true;
}
/*! creates a large leaf that could be larger than supported by the BVH */
NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* create leaf for few primitives */
if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
return createLeaf(prims,pinfo,alloc);
/* fill all children by always splitting the largest one */
PrimInfoRange children[MAX_BRANCHING_FACTOR];
unsigned numChildren = 1;
children[0] = pinfo;
do {
/* find best child with largest bounding box area */
int bestChild = -1;
size_t bestSize = 0;
for (unsigned i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
continue;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
__aligned(64) PrimInfoRange left, right;
if (!sameGeometry(children[bestChild])) {
alignedHeuristic.splitByGeometry(children[bestChild],left,right);
} else {
alignedHeuristic.splitFallback(children[bestChild],left,right);
}
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
} while (numChildren < cfg.branchingFactor);
/* create node */
auto node = createAABBNode(alloc);
for (size_t i=0; i<numChildren; i++) {
const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
setAABBNode(node,i,child,children[i].geomBounds);
}
return node;
}
/*! performs split */
__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
{
/* variable to track the SAH of the best splitting approach */
float bestSAH = inf;
const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
/* try standard binning in aligned space */
float alignedObjectSAH = inf;
HeuristicBinningSAH::Split alignedObjectSplit;
if (aligned) {
alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
bestSAH = min(alignedObjectSAH,bestSAH);
}
/* try standard binning in unaligned space */
UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
LinearSpace3fa uspace;
float unalignedObjectSAH = inf;
if (bestSAH > 0.7f*leafSAH) {
uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
bestSAH = min(unalignedObjectSAH,bestSAH);
}
/* try splitting into two strands */
HeuristicStrandSplitSAH::Split strandSplit;
float strandSAH = inf;
if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
bestSAH = min(strandSAH,bestSAH);
}
/* fallback if SAH heuristics failed */
if (unlikely(!std::isfinite(bestSAH)))
{
alignedHeuristic.deterministic_order(pinfo);
alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
}
/* perform aligned split if this is best */
else if (bestSAH == alignedObjectSAH) {
alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
}
/* perform unaligned split if this is best */
else if (bestSAH == unalignedObjectSAH) {
unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
aligned = false;
}
/* perform strand split if this is best */
else if (bestSAH == strandSAH) {
strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
aligned = false;
}
/* can never happen */
else
assert(false);
}
/*! recursive build */
NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
progressMonitor(pinfo.size());
PrimInfoRange children[MAX_BRANCHING_FACTOR];
/* create leaf node */
if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
alignedHeuristic.deterministic_order(pinfo);
return createLargeLeaf(depth,pinfo,alloc);
}
/* fill all children by always splitting the one with the largest surface area */
size_t numChildren = 1;
children[0] = pinfo;
bool aligned = true;
do {
/* find best child with largest bounding box area */
ssize_t bestChild = -1;
float bestArea = neg_inf;
for (size_t i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.minLeafSize)
continue;
/* remember child with largest area */
if (area(children[i].geomBounds) > bestArea) {
bestArea = area(children[i].geomBounds);
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
PrimInfoRange left, right;
split(children[bestChild],left,right,aligned);
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
} while (numChildren < cfg.branchingFactor);
NodeRef node;
/* create aligned node */
if (aligned)
{
node = createAABBNode(alloc);
/* spawn tasks or ... */
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else {
for (size_t i=0; i<numChildren; i++) {
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
}
}
}
/* create unaligned node */
else
{
node = createOBBNode(alloc);
/* spawn tasks or ... */
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
const OBBox3fa obounds(space,sinfo.geomBounds);
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else
{
for (size_t i=0; i<numChildren; i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
const OBBox3fa obounds(space,sinfo.geomBounds);
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
}
}
}
/* reports a finished range of primrefs */
if (unlikely(alloc_barrier))
reportFinishedRange(pinfo);
return node;
}
private:
Settings cfg;
PrimRef* prims;
const CreateAllocFunc& createAlloc;
const CreateAABBNodeFunc& createAABBNode;
const SetAABBNodeFunc& setAABBNode;
const CreateOBBNodeFunc& createOBBNode;
const SetOBBNodeFunc& setOBBNode;
const CreateLeafFunc& createLeaf;
const ProgressMonitor& progressMonitor;
const ReportFinishedRangeFunc& reportFinishedRange;
private:
HeuristicBinningSAH alignedHeuristic;
UnalignedHeuristicBinningSAH unalignedHeuristic;
HeuristicStrandSplitSAH strandHeuristic;
};
template<typename NodeRef,
typename CreateAllocFunc,
typename CreateAABBNodeFunc,
typename SetAABBNodeFunc,
typename CreateOBBNodeFunc,
typename SetOBBNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor,
typename ReportFinishedRangeFunc>
static NodeRef build (const CreateAllocFunc& createAlloc,
const CreateAABBNodeFunc& createAABBNode,
const SetAABBNodeFunc& setAABBNode,
const CreateOBBNodeFunc& createOBBNode,
const SetOBBNodeFunc& setOBBNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const ReportFinishedRangeFunc& reportFinishedRange,
Scene* scene,
PrimRef* prims,
const PrimInfo& pinfo,
const Settings settings)
{
typedef BuilderT<NodeRef,
CreateAllocFunc,
CreateAABBNodeFunc,SetAABBNodeFunc,
CreateOBBNodeFunc,SetOBBNodeFunc,
CreateLeafFunc,ProgressMonitor,
ReportFinishedRangeFunc> Builder;
Builder builder(scene,prims,createAlloc,
createAABBNode,setAABBNode,
createOBBNode,setOBBNode,
createLeaf,progressMonitor,reportFinishedRange,settings);
NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
};
}
}

View file

@ -0,0 +1,502 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/builder.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_sort.h"
namespace embree
{
namespace isa
{
struct BVHBuilderMorton
{
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
/*! settings for morton builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024) {}
/*! initialize settings from API settings */
Settings (const RTCBuildArguments& settings)
: branchingFactor(2), maxDepth(32), minLeafSize(1), maxLeafSize(7), singleThreadThreshold(1024)
{
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
minLeafSize = min(minLeafSize,maxLeafSize);
}
Settings (size_t branchingFactor, size_t maxDepth, size_t minLeafSize, size_t maxLeafSize, size_t singleThreadThreshold)
: branchingFactor(branchingFactor), maxDepth(maxDepth), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), singleThreadThreshold(singleThreadThreshold)
{
minLeafSize = min(minLeafSize,maxLeafSize);
}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
};
/*! Build primitive consisting of morton code and primitive ID. */
struct __aligned(8) BuildPrim
{
union {
struct {
unsigned int code; //!< morton code
unsigned int index; //!< i'th primitive
};
uint64_t t;
};
/*! interface for radix sort */
__forceinline operator unsigned() const { return code; }
/*! interface for standard sort */
__forceinline bool operator<(const BuildPrim &m) const { return code < m.code; }
};
/*! maps bounding box to morton code */
struct MortonCodeMapping
{
static const size_t LATTICE_BITS_PER_DIM = 10;
static const size_t LATTICE_SIZE_PER_DIM = size_t(1) << LATTICE_BITS_PER_DIM;
vfloat4 base;
vfloat4 scale;
__forceinline MortonCodeMapping(const BBox3fa& bounds)
{
base = (vfloat4)bounds.lower;
const vfloat4 diag = (vfloat4)bounds.upper - (vfloat4)bounds.lower;
scale = select(diag > vfloat4(1E-19f), rcp(diag) * vfloat4(LATTICE_SIZE_PER_DIM * 0.99f),vfloat4(0.0f));
}
__forceinline const vint4 bin (const BBox3fa& box) const
{
const vfloat4 lower = (vfloat4)box.lower;
const vfloat4 upper = (vfloat4)box.upper;
const vfloat4 centroid = lower+upper;
return vint4((centroid-base)*scale);
}
__forceinline unsigned int code (const BBox3fa& box) const
{
const vint4 binID = bin(box);
const unsigned int x = extract<0>(binID);
const unsigned int y = extract<1>(binID);
const unsigned int z = extract<2>(binID);
const unsigned int xyz = bitInterleave(x,y,z);
return xyz;
}
};
#if defined (__AVX2__) || defined(__SYCL_DEVICE_ONLY__)
/*! for AVX2 there is a fast scalar bitInterleave */
struct MortonCodeGenerator
{
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
: mapping(mapping), dest(dest) {}
__forceinline void operator() (const BBox3fa& b, const unsigned index)
{
dest->index = index;
dest->code = mapping.code(b);
dest++;
}
public:
const MortonCodeMapping mapping;
BuildPrim* dest;
size_t currentID;
};
#else
/*! before AVX2 is it better to use the SSE version of bitInterleave */
struct MortonCodeGenerator
{
__forceinline MortonCodeGenerator(const MortonCodeMapping& mapping, BuildPrim* dest)
: mapping(mapping), dest(dest), currentID(0), slots(0), ax(0), ay(0), az(0), ai(0) {}
__forceinline ~MortonCodeGenerator()
{
if (slots != 0)
{
const vint4 code = bitInterleave(ax,ay,az);
for (size_t i=0; i<slots; i++) {
dest[currentID-slots+i].index = ai[i];
dest[currentID-slots+i].code = code[i];
}
}
}
__forceinline void operator() (const BBox3fa& b, const unsigned index)
{
const vint4 binID = mapping.bin(b);
ax[slots] = extract<0>(binID);
ay[slots] = extract<1>(binID);
az[slots] = extract<2>(binID);
ai[slots] = index;
slots++;
currentID++;
if (slots == 4)
{
const vint4 code = bitInterleave(ax,ay,az);
vint4::storeu(&dest[currentID-4],unpacklo(code,ai));
vint4::storeu(&dest[currentID-2],unpackhi(code,ai));
slots = 0;
}
}
public:
const MortonCodeMapping mapping;
BuildPrim* dest;
size_t currentID;
size_t slots;
vint4 ax, ay, az, ai;
};
#endif
template<
typename ReductionTy,
typename Allocator,
typename CreateAllocator,
typename CreateNodeFunc,
typename SetNodeBoundsFunc,
typename CreateLeafFunc,
typename CalculateBounds,
typename ProgressMonitor>
class BuilderT : private Settings
{
ALIGNED_CLASS_(16);
public:
BuilderT (CreateAllocator& createAllocator,
CreateNodeFunc& createNode,
SetNodeBoundsFunc& setBounds,
CreateLeafFunc& createLeaf,
CalculateBounds& calculateBounds,
ProgressMonitor& progressMonitor,
const Settings& settings)
: Settings(settings),
createAllocator(createAllocator),
createNode(createNode),
setBounds(setBounds),
createLeaf(createLeaf),
calculateBounds(calculateBounds),
progressMonitor(progressMonitor),
morton(nullptr) {}
ReductionTy createLargeLeaf(size_t depth, const range<unsigned>& current, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (depth > maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* create leaf for few primitives */
if (current.size() <= maxLeafSize)
return createLeaf(current,alloc);
/* fill all children by always splitting the largest one */
range<unsigned> children[MAX_BRANCHING_FACTOR];
size_t numChildren = 1;
children[0] = current;
do {
/* find best child with largest number of primitives */
size_t bestChild = -1;
size_t bestSize = 0;
for (size_t i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= maxLeafSize)
continue;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == size_t(-1)) break;
/*! split best child into left and right child */
auto split = children[bestChild].split();
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = split.first;
children[numChildren+0] = split.second;
numChildren++;
} while (numChildren < branchingFactor);
/* create node */
auto node = createNode(alloc,numChildren);
/* recurse into each child */
ReductionTy bounds[MAX_BRANCHING_FACTOR];
for (size_t i=0; i<numChildren; i++)
bounds[i] = createLargeLeaf(depth+1,children[i],alloc);
return setBounds(node,bounds,numChildren);
}
/*! recreates morton codes when reaching a region where all codes are identical */
__noinline void recreateMortonCodes(const range<unsigned>& current) const
{
/* fast path for small ranges */
if (likely(current.size() < 1024))
{
/*! recalculate centroid bounds */
BBox3fa centBounds(empty);
for (size_t i=current.begin(); i<current.end(); i++)
centBounds.extend(center2(calculateBounds(morton[i])));
/* recalculate morton codes */
MortonCodeMapping mapping(centBounds);
for (size_t i=current.begin(); i<current.end(); i++)
morton[i].code = mapping.code(calculateBounds(morton[i]));
/* sort morton codes */
std::sort(morton+current.begin(),morton+current.end());
}
else
{
/*! recalculate centroid bounds */
auto calculateCentBounds = [&] ( const range<unsigned>& r ) {
BBox3fa centBounds = empty;
for (size_t i=r.begin(); i<r.end(); i++)
centBounds.extend(center2(calculateBounds(morton[i])));
return centBounds;
};
const BBox3fa centBounds = parallel_reduce(current.begin(), current.end(), unsigned(1024),
BBox3fa(empty), calculateCentBounds, BBox3fa::merge);
/* recalculate morton codes */
MortonCodeMapping mapping(centBounds);
parallel_for(current.begin(), current.end(), unsigned(1024), [&] ( const range<unsigned>& r ) {
for (size_t i=r.begin(); i<r.end(); i++) {
morton[i].code = mapping.code(calculateBounds(morton[i]));
}
});
/*! sort morton codes */
#if defined(TASKING_TBB)
tbb::parallel_sort(morton+current.begin(),morton+current.end());
#else
radixsort32(morton+current.begin(),current.size());
#endif
}
}
__forceinline void split(const range<unsigned>& current, range<unsigned>& left, range<unsigned>& right) const
{
const unsigned int code_start = morton[current.begin()].code;
const unsigned int code_end = morton[current.end()-1].code;
unsigned int bitpos = lzcnt(code_start^code_end);
/* if all items mapped to same morton code, then re-create new morton codes for the items */
if (unlikely(bitpos == 32))
{
recreateMortonCodes(current);
const unsigned int code_start = morton[current.begin()].code;
const unsigned int code_end = morton[current.end()-1].code;
bitpos = lzcnt(code_start^code_end);
/* if the morton code is still the same, goto fall back split */
if (unlikely(bitpos == 32)) {
current.split(left,right);
return;
}
}
/* split the items at the topmost different morton code bit */
const unsigned int bitpos_diff = 31-bitpos;
const unsigned int bitmask = 1 << bitpos_diff;
/* find location where bit differs using binary search */
unsigned begin = current.begin();
unsigned end = current.end();
while (begin + 1 != end) {
const unsigned mid = (begin+end)/2;
const unsigned bit = morton[mid].code & bitmask;
if (bit == 0) begin = mid; else end = mid;
}
unsigned center = end;
#if defined(DEBUG)
for (unsigned int i=begin; i<center; i++) assert((morton[i].code & bitmask) == 0);
for (unsigned int i=center; i<end; i++) assert((morton[i].code & bitmask) == bitmask);
#endif
left = make_range(current.begin(),center);
right = make_range(center,current.end());
}
ReductionTy recurse(size_t depth, const range<unsigned>& current, Allocator alloc, bool toplevel)
{
/* get thread local allocator */
if (!alloc)
alloc = createAllocator();
/* call memory monitor function to signal progress */
if (toplevel && current.size() <= singleThreadThreshold)
progressMonitor(current.size());
/* create leaf node */
if (unlikely(depth+MIN_LARGE_LEAF_LEVELS >= maxDepth || current.size() <= minLeafSize))
return createLargeLeaf(depth,current,alloc);
/* fill all children by always splitting the one with the largest surface area */
range<unsigned> children[MAX_BRANCHING_FACTOR];
split(current,children[0],children[1]);
size_t numChildren = 2;
while (numChildren < branchingFactor)
{
/* find best child with largest number of primitives */
int bestChild = -1;
unsigned bestItems = 0;
for (unsigned int i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= minLeafSize)
continue;
/* remember child with largest area */
if (children[i].size() > bestItems) {
bestItems = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
range<unsigned> left, right;
split(children[bestChild],left,right);
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
}
/* create leaf node if no split is possible */
if (unlikely(numChildren == 1))
return createLeaf(current,alloc);
/* allocate node */
auto node = createNode(alloc,numChildren);
/* process top parts of tree parallel */
ReductionTy bounds[MAX_BRANCHING_FACTOR];
if (current.size() > singleThreadThreshold)
{
/*! parallel_for is faster than spawning sub-tasks */
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
bounds[i] = recurse(depth+1,children[i],nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* finish tree sequentially */
else
{
for (size_t i=0; i<numChildren; i++)
bounds[i] = recurse(depth+1,children[i],alloc,false);
}
return setBounds(node,bounds,numChildren);
}
/* build function */
ReductionTy build(BuildPrim* src, BuildPrim* tmp, size_t numPrimitives)
{
/* sort morton codes */
morton = src;
radix_sort_u32(src,tmp,numPrimitives,singleThreadThreshold);
/* build BVH */
const ReductionTy root = recurse(1, range<unsigned>(0,(unsigned)numPrimitives), nullptr, true);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
public:
CreateAllocator& createAllocator;
CreateNodeFunc& createNode;
SetNodeBoundsFunc& setBounds;
CreateLeafFunc& createLeaf;
CalculateBounds& calculateBounds;
ProgressMonitor& progressMonitor;
public:
BuildPrim* morton;
};
template<
typename ReductionTy,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename SetBoundsFunc,
typename CreateLeafFunc,
typename CalculateBoundsFunc,
typename ProgressMonitor>
static ReductionTy build(CreateAllocFunc createAllocator,
CreateNodeFunc createNode,
SetBoundsFunc setBounds,
CreateLeafFunc createLeaf,
CalculateBoundsFunc calculateBounds,
ProgressMonitor progressMonitor,
BuildPrim* src,
BuildPrim* tmp,
size_t numPrimitives,
const Settings& settings)
{
typedef BuilderT<
ReductionTy,
decltype(createAllocator()),
CreateAllocFunc,
CreateNodeFunc,
SetBoundsFunc,
CreateLeafFunc,
CalculateBoundsFunc,
ProgressMonitor> Builder;
Builder builder(createAllocator,
createNode,
setBounds,
createLeaf,
calculateBounds,
progressMonitor,
settings);
return builder.build(src,tmp,numPrimitives);
}
};
}
}

View file

@ -0,0 +1,693 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#define MBLUR_NUM_TEMPORAL_BINS 2
#define MBLUR_NUM_OBJECT_BINS 32
#include "../bvh/bvh.h"
#include "../builders/primref_mb.h"
#include "heuristic_binning_array_aligned.h"
#include "heuristic_timesplit_array.h"
namespace embree
{
namespace isa
{
template<typename T>
struct SharedVector
{
__forceinline SharedVector() {}
__forceinline SharedVector(T* ptr, size_t refCount = 1)
: prims(ptr), refCount(refCount) {}
__forceinline void incRef() {
refCount++;
}
__forceinline void decRef()
{
if (--refCount == 0)
delete prims;
}
T* prims;
size_t refCount;
};
template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
struct LocalChildListT
{
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
__forceinline LocalChildListT (const BuildRecord& record)
: numChildren(1), numSharedPrimVecs(1)
{
/* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
children[0] = record;
primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
}
__forceinline ~LocalChildListT()
{
for (size_t i = 0; i < numChildren; i++)
primvecs[i]->decRef();
}
__forceinline BuildRecord& operator[] ( const size_t i ) {
return children[i];
}
__forceinline size_t size() const {
return numChildren;
}
__forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
{
SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
if (lrecord.prims.prims == bsharedPrimVec->prims) {
primvecs[bestChild] = bsharedPrimVec;
bsharedPrimVec->incRef();
}
else {
primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
}
if (rrecord.prims.prims == bsharedPrimVec->prims) {
primvecs[numChildren] = bsharedPrimVec;
bsharedPrimVec->incRef();
}
else {
primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
}
bsharedPrimVec->decRef();
new_vector.release();
children[bestChild] = lrecord;
children[numChildren] = rrecord;
numChildren++;
}
public:
array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
size_t numChildren;
array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
size_t numSharedPrimVecs;
};
template<typename Mesh>
struct RecalculatePrimRef
{
Scene* scene;
__forceinline RecalculatePrimRef (Scene* scene)
: scene(scene) {}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
{
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const Mesh* mesh = scene->get<Mesh>(geomID);
const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
}
// __noinline is workaround for ICC16 bug under MacOSX
__noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
{
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const Mesh* mesh = scene->get<Mesh>(geomID);
const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
}
// __noinline is workaround for ICC16 bug under MacOSX
__noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
}
};
struct VirtualRecalculatePrimRef
{
Scene* scene;
const SubGridBuildData * const sgrids;
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
: scene(scene), sgrids(sgrids) {}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
{
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const Geometry* mesh = scene->get(geomID);
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
{
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const Geometry* mesh = scene->get(geomID);
const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
}
};
struct BVHBuilderMSMBlur
{
/*! settings for msmblur builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
singleThreadThreshold(1024) {}
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
{
minLeafSize = min(minLeafSize,maxLeafSize);
}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
float travCost; //!< estimated cost of one traversal step
float intCost; //!< estimated cost of one primitive intersection
bool singleLeafTimeSegment; //!< split time to single time range
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
};
struct BuildRecord
{
public:
__forceinline BuildRecord () {}
__forceinline BuildRecord (size_t depth)
: depth(depth) {}
__forceinline BuildRecord (const SetMB& prims, size_t depth)
: depth(depth), prims(prims) {}
__forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
return a.prims.size() < b.prims.size();
}
__forceinline size_t size() const {
return prims.size();
}
public:
size_t depth; //!< Depth of the root of this subtree.
SetMB prims; //!< The list of primitives.
};
struct BuildRecordSplit : public BuildRecord
{
__forceinline BuildRecordSplit () {}
__forceinline BuildRecordSplit (size_t depth)
: BuildRecord(depth) {}
__forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
: BuildRecord(record), split(split) {}
BinSplit<MBLUR_NUM_OBJECT_BINS> split;
};
template<
typename NodeRef,
typename RecalculatePrimRef,
typename Allocator,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename SetNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
class BuilderT
{
ALIGNED_CLASS_(16);
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
typedef mvector<PrimRefMB>* PrimRefVector;
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
public:
BuilderT (MemoryMonitorInterface* device,
const RecalculatePrimRef recalculatePrimRef,
const CreateAllocFunc createAlloc,
const CreateNodeFunc createNode,
const SetNodeFunc setNode,
const CreateLeafFunc createLeaf,
const ProgressMonitor progressMonitor,
const Settings& settings)
: cfg(settings),
heuristicObjectSplit(),
heuristicTemporalSplit(device, recalculatePrimRef),
recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
progressMonitor(progressMonitor)
{
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
}
/*! finds the best split */
const Split find(const SetMB& set)
{
/* first try standard object split */
const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
const float object_split_sah = object_split.splitSAH();
/* test temporal splits only when object split was bad */
const float leaf_sah = set.leafSAH(cfg.logBlockSize);
if (object_split_sah < 0.50f*leaf_sah)
return object_split;
/* do temporal splits only if the time range is big enough */
if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
{
const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
const float temporal_split_sah = temporal_split.splitSAH();
/* take temporal split if it improved SAH */
if (temporal_split_sah < object_split_sah)
return temporal_split;
}
return object_split;
}
/*! array partitioning */
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
{
/* perform object split */
if (likely(split.data == Split::SPLIT_OBJECT)) {
heuristicObjectSplit.split(split,set,lset,rset);
}
/* perform temporal split */
else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
return heuristicTemporalSplit.split(split,set,lset,rset);
}
/* perform fallback split */
else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
set.deterministic_order();
splitFallback(set,lset,rset);
}
/* split by geometry */
else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
set.deterministic_order();
splitByGeometry(set,lset,rset);
}
else
assert(false);
return std::unique_ptr<mvector<PrimRefMB>>();
}
/*! finds the best fallback split */
__noinline Split findFallback(const SetMB& set)
{
/* split if primitives are not from same geometry */
if (!sameGeometry(set))
return Split(0.0f,Split::SPLIT_GEOMID);
/* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
if (cfg.singleLeafTimeSegment)
{
/* test if one primitive has more than one time segment in time range, if so split time */
for (size_t i=set.begin(); i<set.end(); i++)
{
const PrimRefMB& prim = (*set.prims)[i];
const range<int> itime_range = prim.timeSegmentRange(set.time_range);
const int localTimeSegments = itime_range.size();
assert(localTimeSegments > 0);
if (localTimeSegments > 1) {
const int icenter = (itime_range.begin() + itime_range.end())/2;
const float splitTime = prim.timeStep(icenter);
return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
}
}
}
/* otherwise return fallback split */
return Split(0.0f,Split::SPLIT_FALLBACK);
}
/*! performs fallback split */
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
{
mvector<PrimRefMB>& prims = *set.prims;
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end + 1) / 2;
PrimInfoMB linfo = empty;
for (size_t i=begin; i<center; i++)
linfo.add_primref(prims[i]);
PrimInfoMB rinfo = empty;
for (size_t i=center; i<end; i++)
rinfo.add_primref(prims[i]);
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
}
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry(const SetMB& set)
{
if (set.size() == 0) return true;
mvector<PrimRefMB>& prims = *set.prims;
const size_t begin = set.begin();
const size_t end = set.end();
unsigned int firstGeomID = prims[begin].geomID();
for (size_t i=begin+1; i<end; i++) {
if (prims[i].geomID() != firstGeomID){
return false;
}
}
return true;
}
/* split by geometry ID */
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
{
assert(set.size() > 1);
mvector<PrimRefMB>& prims = *set.prims;
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfoMB left(empty);
PrimInfoMB right(empty);
unsigned int geomID = prims[begin].geomID();
size_t center = serial_partitioning(prims.data(),begin,end,left,right,
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
[ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
}
const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (in.depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* replace already found split by fallback split */
const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
/* special case when directly creating leaf without any splits that could shrink time_range */
bool force_split = false;
if (current.depth == 1 && current.size() > 0)
{
BBox1f c = empty;
BBox1f p = current.prims.time_range;
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
mvector<PrimRefMB>& prims = *current.prims.prims;
c.extend(prims[i].time_range);
}
force_split = c.lower > p.lower || c.upper < p.upper;
}
/* create leaf for few primitives */
if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
return createLeaf(current,alloc);
/* fill all children by always splitting the largest one */
bool hasTimeSplits = false;
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
LocalChildListSplit children(current);
do {
/* find best child with largest bounding box area */
size_t bestChild = -1;
size_t bestSize = 0;
for (size_t i=0; i<children.size(); i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
continue;
force_split = false;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/* perform best found split */
BuildRecordSplit& brecord = children[bestChild];
BuildRecordSplit lrecord(current.depth+1);
BuildRecordSplit rrecord(current.depth+1);
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
hasTimeSplits |= new_vector != nullptr;
/* find new splits */
lrecord.split = findFallback(lrecord.prims);
rrecord.split = findFallback(rrecord.prims);
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
} while (children.size() < cfg.branchingFactor);
/* detect time_ranges that have shrunken */
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = in.prims.time_range;
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
}
/* create node */
auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
/* recurse into each child and perform reduction */
LBBox3fa gbounds = empty;
for (size_t i=0; i<children.size(); i++) {
values[i] = createLargeLeaf(children[i],alloc);
gbounds.extend(values[i].lbounds);
}
setNode(current,children.children.data(),node,values,children.numChildren);
/* calculate geometry bounds of this node */
if (hasTimeSplits)
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
else
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
}
const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && current.size() <= cfg.singleThreadThreshold)
progressMonitor(current.size());
/*! find best split */
const Split csplit = find(current.prims);
/*! compute leaf and split cost */
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
/*! create a leaf node when threshold reached or SAH tells us to stop */
if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
current.prims.deterministic_order();
return createLargeLeaf(current,alloc);
}
/*! perform initial split */
SetMB lprims,rprims;
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
bool hasTimeSplits = new_vector != nullptr;
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
LocalChildList children(current);
{
BuildRecord lrecord(lprims,current.depth+1);
BuildRecord rrecord(rprims,current.depth+1);
children.split(0,lrecord,rrecord,std::move(new_vector));
}
/*! split until node is full or SAH tells us to stop */
while (children.size() < cfg.branchingFactor)
{
/*! find best child to split */
float bestArea = neg_inf;
ssize_t bestChild = -1;
for (size_t i=0; i<children.size(); i++)
{
if (children[i].size() <= cfg.minLeafSize) continue;
if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
}
}
if (bestChild == -1) break;
/* perform split */
BuildRecord& brecord = children[bestChild];
BuildRecord lrecord(current.depth+1);
BuildRecord rrecord(current.depth+1);
Split csplit = find(brecord.prims);
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
hasTimeSplits |= new_vector != nullptr;
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
}
/* detect time_ranges that have shrunken */
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = current.prims.time_range;
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
}
/* sort buildrecords for simpler shadow ray traversal */
//std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
/*! create an inner node */
auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
LBBox3fa gbounds = empty;
/* spawn tasks */
if (unlikely(current.size() > cfg.singleThreadThreshold))
{
/*! parallel_for is faster than spawning sub-tasks */
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
}
});
/*! merge bounding boxes */
for (size_t i=0; i<children.size(); i++)
gbounds.extend(values[i].lbounds);
}
/* recurse into each child */
else
{
//for (size_t i=0; i<children.size(); i++)
for (ssize_t i=children.size()-1; i>=0; i--) {
values[i] = recurse(children[i],alloc,false);
gbounds.extend(values[i].lbounds);
}
}
setNode(current,children.children.data(),node,values,children.numChildren);
/* calculate geometry bounds of this node */
if (unlikely(hasTimeSplits))
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
else
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
}
/*! builder entry function */
__forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
{
const SetMB set(pinfo,&prims);
auto ret = recurse(BuildRecord(set,1),nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
return ret;
}
private:
Settings cfg;
HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
const RecalculatePrimRef recalculatePrimRef;
const CreateAllocFunc createAlloc;
const CreateNodeFunc createNode;
const SetNodeFunc setNode;
const CreateLeafFunc createLeaf;
const ProgressMonitor progressMonitor;
};
template<typename NodeRef,
typename RecalculatePrimRef,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename SetNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitorFunc>
static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
const PrimInfoMB& pinfo,
MemoryMonitorInterface* device,
const RecalculatePrimRef recalculatePrimRef,
const CreateAllocFunc createAlloc,
const CreateNodeFunc createNode,
const SetNodeFunc setNode,
const CreateLeafFunc createLeaf,
const ProgressMonitorFunc progressMonitor,
const Settings& settings)
{
typedef BuilderT<
NodeRef,
RecalculatePrimRef,
decltype(createAlloc()),
CreateAllocFunc,
CreateNodeFunc,
SetNodeFunc,
CreateLeafFunc,
ProgressMonitorFunc> Builder;
Builder builder(device,
recalculatePrimRef,
createAlloc,
createNode,
setNode,
createLeaf,
progressMonitor,
settings);
return builder(prims,pinfo);
}
};
}
}

View file

@ -0,0 +1,526 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../geometry/primitive.h"
#include "../builders/bvh_builder_msmblur.h"
#include "../builders/heuristic_binning_array_aligned.h"
#include "../builders/heuristic_binning_array_unaligned.h"
#include "../builders/heuristic_timesplit_array.h"
namespace embree
{
namespace isa
{
struct BVHBuilderHairMSMBlur
{
/*! settings for msmblur builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
};
struct BuildRecord
{
public:
__forceinline BuildRecord () {}
__forceinline BuildRecord (size_t depth)
: depth(depth) {}
__forceinline BuildRecord (const SetMB& prims, size_t depth)
: depth(depth), prims(prims) {}
__forceinline size_t size() const {
return prims.size();
}
public:
size_t depth; //!< depth of the root of this subtree
SetMB prims; //!< the list of primitives
};
template<typename NodeRef,
typename RecalculatePrimRef,
typename CreateAllocFunc,
typename CreateAABBNodeMBFunc,
typename SetAABBNodeMBFunc,
typename CreateOBBNodeMBFunc,
typename SetOBBNodeMBFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
class BuilderT
{
ALIGNED_CLASS_(16);
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
typedef FastAllocator::CachedAllocator Allocator;
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
public:
BuilderT (Scene* scene,
const RecalculatePrimRef& recalculatePrimRef,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeMBFunc& createAABBNodeMB,
const SetAABBNodeMBFunc& setAABBNodeMB,
const CreateOBBNodeMBFunc& createOBBNodeMB,
const SetOBBNodeMBFunc& setOBBNodeMB,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const Settings settings)
: cfg(settings),
scene(scene),
recalculatePrimRef(recalculatePrimRef),
createAlloc(createAlloc),
createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
createLeaf(createLeaf),
progressMonitor(progressMonitor),
unalignedHeuristic(scene),
temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
private:
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry(const SetMB& set)
{
mvector<PrimRefMB>& prims = *set.prims;
unsigned int firstGeomID = prims[set.begin()].geomID();
for (size_t i=set.begin()+1; i<set.end(); i++) {
if (prims[i].geomID() != firstGeomID){
return false;
}
}
return true;
}
/*! performs some split if SAH approaches fail */
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
{
mvector<PrimRefMB>& prims = *set.prims;
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
PrimInfoMB linfo = empty;
for (size_t i=begin; i<center; i++)
linfo.add_primref(prims[i]);
PrimInfoMB rinfo = empty;
for (size_t i=center; i<end; i++)
rinfo.add_primref(prims[i]);
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
}
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
{
assert(set.size() > 1);
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfoMB linfo(empty);
PrimInfoMB rinfo(empty);
unsigned int geomID = (*set.prims)[begin].geomID();
size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
}
/*! creates a large leaf that could be larger than supported by the BVH */
NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (current.depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* special case when directly creating leaf without any splits that could shrink time_range */
bool force_split = false;
if (current.depth == 1 && current.size() > 0)
{
BBox1f c = empty;
BBox1f p = current.prims.time_range;
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
mvector<PrimRefMB>& prims = *current.prims.prims;
c.extend(prims[i].time_range);
}
force_split = c.lower > p.lower || c.upper < p.upper;
}
/* create leaf for few primitives */
if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
return createLeaf(current.prims,alloc);
/* fill all children by always splitting the largest one */
LocalChildList children(current);
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
do {
/* find best child with largest bounding box area */
int bestChild = -1;
size_t bestSize = 0;
for (unsigned i=0; i<children.size(); i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
continue;
force_split = false;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
BuildRecord left(current.depth+1);
BuildRecord right(current.depth+1);
if (!sameGeometry(children[bestChild].prims)) {
splitByGeometry(children[bestChild].prims,left.prims,right.prims);
} else {
splitFallback(children[bestChild].prims,left.prims,right.prims);
}
children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
} while (children.size() < cfg.branchingFactor);
/* detect time_ranges that have shrunken */
bool timesplit = false;
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = current.prims.time_range;
timesplit |= c.lower > p.lower || c.upper < p.upper;
}
/* create node */
NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++) {
values[i] = createLargeLeaf(children[i],alloc);
bounds.extend(values[i].lbounds);
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
if (timesplit)
bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/*! performs split */
std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
{
/* variable to track the SAH of the best splitting approach */
float bestSAH = inf;
const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
/* perform standard binning in aligned space */
HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
float alignedObjectSAH = alignedObjectSplit.splitSAH();
bestSAH = min(alignedObjectSAH,bestSAH);
/* perform standard binning in unaligned space */
UnalignedHeuristicBinning::Split unalignedObjectSplit;
LinearSpace3fa uspace;
float unalignedObjectSAH = inf;
if (alignedObjectSAH > 0.7f*leafSAH) {
uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
bestSAH = min(unalignedObjectSAH,bestSAH);
}
/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
float temporal_split_sah = inf;
typename HeuristicTemporal::Split temporal_split;
if (bestSAH > 0.5f*leafSAH) {
if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
temporal_split_sah = temporal_split.splitSAH();
bestSAH = min(temporal_split_sah,bestSAH);
}
}
/* perform fallback split if SAH heuristics failed */
if (unlikely(!std::isfinite(bestSAH))) {
current.prims.deterministic_order();
splitFallback(current.prims,lrecord.prims,rrecord.prims);
}
/* perform aligned split if this is best */
else if (likely(bestSAH == alignedObjectSAH)) {
alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
}
/* perform unaligned split if this is best */
else if (likely(bestSAH == unalignedObjectSAH)) {
unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
aligned = false;
}
/* perform temporal split if this is best */
else if (likely(bestSAH == temporal_split_sah)) {
timesplit = true;
return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
}
else
assert(false);
return std::unique_ptr<mvector<PrimRefMB>>();
}
/*! recursive build */
NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
progressMonitor(current.size());
/* create leaf node */
if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
current.prims.deterministic_order();
return createLargeLeaf(current,alloc);
}
/* fill all children by always splitting the one with the largest surface area */
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
LocalChildList children(current);
bool aligned = true;
bool timesplit = false;
do {
/* find best child with largest bounding box area */
ssize_t bestChild = -1;
float bestArea = neg_inf;
for (size_t i=0; i<children.size(); i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.minLeafSize)
continue;
/* remember child with largest area */
const float A = children[i].prims.halfArea();
if (A > bestArea) {
bestArea = children[i].prims.halfArea();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
BuildRecord left(current.depth+1);
BuildRecord right(current.depth+1);
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
children.split(bestChild,left,right,std::move(new_vector));
} while (children.size() < cfg.branchingFactor);
/* detect time_ranges that have shrunken */
for (size_t i=0; i<children.size(); i++) {
const BBox1f c = children[i].prims.time_range;
const BBox1f p = current.prims.time_range;
timesplit |= c.lower > p.lower || c.upper < p.upper;
}
/* create time split node */
if (timesplit)
{
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequential */
else {
for (size_t i=0; i<children.size(); i++) {
values[i] = recurse(children[i],alloc,false);
}
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/* create aligned node */
else if (aligned)
{
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
cbounds[i] = values[i].lbounds;
_mm_mfence(); // to allow non-temporal stores during build
}
});
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++)
bounds.extend(cbounds[i]);
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
/* ... continue sequentially */
else
{
LBBox3fa bounds = empty;
for (size_t i=0; i<children.size(); i++) {
values[i] = recurse(children[i],alloc,false);
bounds.extend(values[i].lbounds);
}
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
}
/* create unaligned node */
else
{
const NodeRef node = createOBBNodeMB(alloc);
/* spawn tasks or ... */
if (current.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
const auto child = recurse(children[i],nullptr,true);
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else
{
for (size_t i=0; i<children.size(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
const auto child = recurse(children[i],alloc,false);
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
}
}
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
return NodeRecordMB4D(node,bounds,current.prims.time_range);
}
}
public:
/*! entry point into builder */
NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
{
BuildRecord record(SetMB(pinfo,&prims),1);
auto root = recurse(record,nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
private:
Settings cfg;
Scene* scene;
const RecalculatePrimRef& recalculatePrimRef;
const CreateAllocFunc& createAlloc;
const CreateAABBNodeMBFunc& createAABBNodeMB;
const SetAABBNodeMBFunc& setAABBNodeMB;
const CreateOBBNodeMBFunc& createOBBNodeMB;
const SetOBBNodeMBFunc& setOBBNodeMB;
const CreateLeafFunc& createLeaf;
const ProgressMonitor& progressMonitor;
private:
HeuristicBinning alignedHeuristic;
UnalignedHeuristicBinning unalignedHeuristic;
HeuristicTemporal temporalSplitHeuristic;
};
template<typename NodeRef,
typename RecalculatePrimRef,
typename CreateAllocFunc,
typename CreateAABBNodeMBFunc,
typename SetAABBNodeMBFunc,
typename CreateOBBNodeMBFunc,
typename SetOBBNodeMBFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
const RecalculatePrimRef& recalculatePrimRef,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeMBFunc& createAABBNodeMB,
const SetAABBNodeMBFunc& setAABBNodeMB,
const CreateOBBNodeMBFunc& createOBBNodeMB,
const SetOBBNodeMBFunc& setOBBNodeMB,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const Settings settings)
{
typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
CreateLeafFunc,ProgressMonitor> Builder;
Builder builder(scene,recalculatePrimRef,createAlloc,
createAABBNodeMB,setAABBNodeMB,
createOBBNodeMB,setOBBNodeMB,
createLeaf,progressMonitor,settings);
return builder(prims,pinfo);
}
};
}
}

View file

@ -0,0 +1,664 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "heuristic_binning_array_aligned.h"
#include "heuristic_spatial_array.h"
#include "heuristic_openmerge_array.h"
#define NUM_OBJECT_BINS 32
#define NUM_SPATIAL_BINS 16
namespace embree
{
namespace isa
{
MAYBE_UNUSED static const float travCost = 1.0f;
MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
struct GeneralBVHBuilder
{
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth
/*! settings for SAH builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}
/*! initialize settings from API settings */
Settings (const RTCBuildArguments& settings)
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),
travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)
{
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;
if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize);
if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;
if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;
if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;
if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;
minLeafSize = min(minLeafSize,maxLeafSize);
}
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
{
minLeafSize = min(minLeafSize,maxLeafSize);
}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
float travCost; //!< estimated cost of one traversal step
float intCost; //!< estimated cost of one primitive intersection
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished
};
/*! recursive state of builder */
template<typename Set, typename Split>
struct BuildRecordT
{
public:
__forceinline BuildRecordT () {}
__forceinline BuildRecordT (size_t depth)
: depth(depth), alloc_barrier(false), prims(empty) {}
__forceinline BuildRecordT (size_t depth, const Set& prims)
: depth(depth), alloc_barrier(false), prims(prims) {}
__forceinline BBox3fa bounds() const { return prims.geomBounds; }
__forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }
__forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }
__forceinline size_t size() const { return prims.size(); }
public:
size_t depth; //!< Depth of the root of this subtree.
bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes
Set prims; //!< The list of primitives.
};
template<typename PrimRef, typename Set>
struct DefaultCanCreateLeafFunc
{
__forceinline bool operator()(const PrimRef*, const Set&) const { return true; }
};
template<typename PrimRef, typename Set>
struct DefaultCanCreateLeafSplitFunc
{
__forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }
};
template<typename BuildRecord,
typename Heuristic,
typename Set,
typename PrimRef,
typename ReductionTy,
typename Allocator,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename CanCreateLeafFunc,
typename CanCreateLeafSplitFunc,
typename ProgressMonitor>
class BuilderT
{
friend struct GeneralBVHBuilder;
BuilderT (PrimRef* prims,
Heuristic& heuristic,
const CreateAllocFunc& createAlloc,
const CreateNodeFunc& createNode,
const UpdateNodeFunc& updateNode,
const CreateLeafFunc& createLeaf,
const CanCreateLeafFunc& canCreateLeaf,
const CanCreateLeafSplitFunc& canCreateLeafSplit,
const ProgressMonitor& progressMonitor,
const Settings& settings) :
cfg(settings),
prims(prims),
heuristic(heuristic),
createAlloc(createAlloc),
createNode(createNode),
updateNode(updateNode),
createLeaf(createLeaf),
canCreateLeaf(canCreateLeaf),
canCreateLeafSplit(canCreateLeafSplit),
progressMonitor(progressMonitor)
{
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
}
const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (current.depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* create leaf for few primitives */
if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))
return createLeaf(prims,current.prims,alloc);
/* fill all children by always splitting the largest one */
ReductionTy values[MAX_BRANCHING_FACTOR];
BuildRecord children[MAX_BRANCHING_FACTOR];
size_t numChildren = 1;
children[0] = current;
do {
/* find best child with largest bounding box area */
size_t bestChild = -1;
size_t bestSize = 0;
for (size_t i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))
continue;
/* remember child with largest size */
if (children[i].prims.size() > bestSize) {
bestSize = children[i].prims.size();
bestChild = i;
}
}
if (bestChild == (size_t)-1) break;
/*! split best child into left and right child */
BuildRecord left(current.depth+1);
BuildRecord right(current.depth+1);
if (!canCreateLeaf(prims,children[bestChild].prims)) {
canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);
} else {
heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);
}
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
} while (numChildren < cfg.branchingFactor);
/* set barrier for primrefarrayalloc */
if (unlikely(current.size() > cfg.primrefarrayalloc))
for (size_t i=0; i<numChildren; i++)
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
/* create node */
auto node = createNode(children,numChildren,alloc);
/* recurse into each child and perform reduction */
for (size_t i=0; i<numChildren; i++)
values[i] = createLargeLeaf(children[i],alloc);
/* perform reduction */
return updateNode(current,children,node,values,numChildren);
}
const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && current.size() <= cfg.singleThreadThreshold)
progressMonitor(current.size());
/*! find best split */
auto split = heuristic.find(current.prims,cfg.logBlockSize);
/*! compute leaf and split cost */
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();
assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
/*! create a leaf node when threshold reached or SAH tells us to stop */
if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
heuristic.deterministic_order(current.prims);
return createLargeLeaf(current,alloc);
}
/*! perform initial split */
Set lprims,rprims;
heuristic.split(split,current.prims,lprims,rprims);
/*! initialize child list with initial split */
ReductionTy values[MAX_BRANCHING_FACTOR];
BuildRecord children[MAX_BRANCHING_FACTOR];
children[0] = BuildRecord(current.depth+1,lprims);
children[1] = BuildRecord(current.depth+1,rprims);
size_t numChildren = 2;
/*! split until node is full or SAH tells us to stop */
while (numChildren < cfg.branchingFactor)
{
/*! find best child to split */
float bestArea = neg_inf;
ssize_t bestChild = -1;
for (size_t i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].prims.size() <= cfg.minLeafSize) continue;
/* find child with largest surface area */
if (halfArea(children[i].prims.geomBounds) > bestArea) {
bestChild = i;
bestArea = halfArea(children[i].prims.geomBounds);
}
}
if (bestChild == -1) break;
/* perform best found split */
BuildRecord& brecord = children[bestChild];
BuildRecord lrecord(current.depth+1);
BuildRecord rrecord(current.depth+1);
auto split = heuristic.find(brecord.prims,cfg.logBlockSize);
heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);
children[bestChild ] = lrecord;
children[numChildren] = rrecord;
numChildren++;
}
/* set barrier for primrefarrayalloc */
if (unlikely(current.size() > cfg.primrefarrayalloc))
for (size_t i=0; i<numChildren; i++)
children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;
/* sort buildrecords for faster shadow ray traversal */
std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());
/*! create an inner node */
auto node = createNode(children,numChildren,alloc);
/* spawn tasks */
if (current.size() > cfg.singleThreadThreshold)
{
/*! parallel_for is faster than spawning sub-tasks */
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
for (size_t i=r.begin(); i<r.end(); i++) {
values[i] = recurse(children[i],nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
}
});
return updateNode(current,children,node,values,numChildren);
}
/* recurse into each child */
else
{
for (size_t i=0; i<numChildren; i++)
values[i] = recurse(children[i],alloc,false);
return updateNode(current,children,node,values,numChildren);
}
}
private:
Settings cfg;
PrimRef* prims;
Heuristic& heuristic;
const CreateAllocFunc& createAlloc;
const CreateNodeFunc& createNode;
const UpdateNodeFunc& updateNode;
const CreateLeafFunc& createLeaf;
const CanCreateLeafFunc& canCreateLeaf;
const CanCreateLeafSplitFunc& canCreateLeafSplit;
const ProgressMonitor& progressMonitor;
};
template<
typename ReductionTy,
typename Heuristic,
typename Set,
typename PrimRef,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
__noinline static ReductionTy build(Heuristic& heuristic,
PrimRef* prims,
const Set& set,
CreateAllocFunc createAlloc,
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const Settings& settings)
{
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
typedef BuilderT<
BuildRecord,
Heuristic,
Set,
PrimRef,
ReductionTy,
decltype(createAlloc()),
CreateAllocFunc,
CreateNodeFunc,
UpdateNodeFunc,
CreateLeafFunc,
DefaultCanCreateLeafFunc<PrimRef, Set>,
DefaultCanCreateLeafSplitFunc<PrimRef, Set>,
ProgressMonitor> Builder;
/* instantiate builder */
Builder builder(prims,
heuristic,
createAlloc,
createNode,
updateNode,
createLeaf,
DefaultCanCreateLeafFunc<PrimRef, Set>(),
DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),
progressMonitor,
settings);
/* build hierarchy */
BuildRecord record(1,set);
const ReductionTy root = builder.recurse(record,nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
template<
typename ReductionTy,
typename Heuristic,
typename Set,
typename PrimRef,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename CanCreateLeafFunc,
typename CanCreateLeafSplitFunc,
typename ProgressMonitor>
__noinline static ReductionTy build(Heuristic& heuristic,
PrimRef* prims,
const Set& set,
CreateAllocFunc createAlloc,
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
const CanCreateLeafFunc& canCreateLeaf,
const CanCreateLeafSplitFunc& canCreateLeafSplit,
const ProgressMonitor& progressMonitor,
const Settings& settings)
{
typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
typedef BuilderT<
BuildRecord,
Heuristic,
Set,
PrimRef,
ReductionTy,
decltype(createAlloc()),
CreateAllocFunc,
CreateNodeFunc,
UpdateNodeFunc,
CreateLeafFunc,
CanCreateLeafFunc,
CanCreateLeafSplitFunc,
ProgressMonitor> Builder;
/* instantiate builder */
Builder builder(prims,
heuristic,
createAlloc,
createNode,
updateNode,
createLeaf,
canCreateLeaf,
canCreateLeafSplit,
progressMonitor,
settings);
/* build hierarchy */
BuildRecord record(1,set);
const ReductionTy root = builder.recurse(record,nullptr,true);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
};
/* SAH builder that operates on an array of BuildRecords */
struct BVHBuilderBinnedSAH
{
typedef PrimInfoRange Set;
typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;
typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;
typedef GeneralBVHBuilder::Settings Settings;
/*! special builder that propagates reduction over the tree */
template<
typename ReductionTy,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor>
static ReductionTy build(CreateAllocFunc createAlloc,
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
PrimRef* prims, const PrimInfo& pinfo,
const Settings& settings)
{
Heuristic heuristic(prims);
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
heuristic,
prims,
PrimInfoRange(0,pinfo.size(),pinfo),
createAlloc,
createNode,
updateNode,
createLeaf,
progressMonitor,
settings);
}
/*! special builder that propagates reduction over the tree */
template<
typename ReductionTy,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename CanCreateLeafFunc,
typename CanCreateLeafSplitFunc,
typename ProgressMonitor>
static ReductionTy build(CreateAllocFunc createAlloc,
CreateNodeFunc createNode, UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
const CanCreateLeafFunc& canCreateLeaf,
const CanCreateLeafSplitFunc& canCreateLeafSplit,
const ProgressMonitor& progressMonitor,
PrimRef* prims, const PrimInfo& pinfo,
const Settings& settings)
{
Heuristic heuristic(prims);
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
heuristic,
prims,
PrimInfoRange(0,pinfo.size(),pinfo),
createAlloc,
createNode,
updateNode,
createLeaf,
canCreateLeaf,
canCreateLeafSplit,
progressMonitor,
settings);
}
};
/* Spatial SAH builder that operates on an double-buffered array of BuildRecords */
struct BVHBuilderBinnedFastSpatialSAH
{
typedef PrimInfoExtRange Set;
typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
typedef GeneralBVHBuilder::Settings Settings;
static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
template<typename ReductionTy, typename UserCreateLeaf>
struct CreateLeafExt
{
__forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)
: userCreateLeaf(userCreateLeaf) {}
// __noinline is workaround for ICC2016 compiler bug
template<typename Allocator>
__noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const
{
for (size_t i=range.begin(); i<range.end(); i++)
prims[i].lower.u &= GEOMID_MASK;
return userCreateLeaf(prims,range,alloc);
}
const UserCreateLeaf userCreateLeaf;
};
/*! special builder that propagates reduction over the tree */
template<
typename ReductionTy,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename SplitPrimitiveFunc,
typename ProgressMonitor>
static ReductionTy build(CreateAllocFunc createAlloc,
CreateNodeFunc createNode,
UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
SplitPrimitiveFunc splitPrimitive,
ProgressMonitor progressMonitor,
PrimRef* prims,
const size_t extSize,
const PrimInfo& pinfo,
const Settings& settings)
{
typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;
Heuristic heuristic(splitPrimitive,prims,pinfo);
/* calculate total surface area */ // FIXME: this sum is not deterministic
const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {
double A = 0.0f;
for (size_t i=r.begin(); i<r.end(); i++)
{
PrimRef& prim = prims[i];
A += area(prim.bounds());
}
return A;
},std::plus<double>());
/* calculate maximum number of spatial splits per primitive */
const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;
const float f = 10.0f;
const float invA = 1.0f / A;
parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
{
PrimRef& prim = prims[i];
assert((prim.geomID() & SPLITS_MASK) == 0);
// FIXME: is there a better general heuristic ?
const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);
unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));
prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
}
});
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(
heuristic,
prims,
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
createAlloc,
createNode,
updateNode,
CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),
progressMonitor,
settings);
}
};
/* Open/Merge SAH builder that operates on an array of BuildRecords */
struct BVHBuilderBinnedOpenMergeSAH
{
static const size_t NUM_OBJECT_BINS_HQ = 32;
typedef PrimInfoExtRange Set;
typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;
typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;
typedef GeneralBVHBuilder::Settings Settings;
/*! special builder that propagates reduction over the tree */
template<
typename ReductionTy,
typename BuildRef,
typename CreateAllocFunc,
typename CreateNodeFunc,
typename UpdateNodeFunc,
typename CreateLeafFunc,
typename NodeOpenerFunc,
typename ProgressMonitor>
static ReductionTy build(CreateAllocFunc createAlloc,
CreateNodeFunc createNode,
UpdateNodeFunc updateNode,
const CreateLeafFunc& createLeaf,
NodeOpenerFunc nodeOpenerFunc,
ProgressMonitor progressMonitor,
BuildRef* prims,
const size_t extSize,
const PrimInfo& pinfo,
const Settings& settings)
{
typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;
Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);
return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(
heuristic,
prims,
PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),
createAlloc,
createNode,
updateNode,
createLeaf,
progressMonitor,
settings);
}
};
}
}

View file

@ -0,0 +1,552 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "priminfo.h"
#include "priminfo_mb.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_partition.h"
namespace embree
{
namespace isa
{
/*! mapping into bins */
template<size_t BINS>
struct BinMapping
{
public:
__forceinline BinMapping() {}
/*! calculates the mapping */
__forceinline BinMapping(size_t N, const BBox3fa& centBounds)
{
num = min(BINS,size_t(4.0f + 0.05f*N));
assert(num >= 1);
const vfloat4 eps = 1E-34f;
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
ofs = (vfloat4) centBounds.lower;
}
/*! calculates the mapping */
__forceinline BinMapping(const BBox3fa& centBounds)
{
num = BINS;
const vfloat4 eps = 1E-34f;
const vfloat4 diag = max(eps, (vfloat4) centBounds.size());
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
ofs = (vfloat4) centBounds.lower;
}
/*! calculates the mapping */
template<typename PrimInfo>
__forceinline BinMapping(const PrimInfo& pinfo)
{
const vfloat4 eps = 1E-34f;
num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));
const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());
scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));
ofs = (vfloat4) pinfo.centBounds.lower;
}
/*! returns number of bins */
__forceinline size_t size() const { return num; }
/*! slower but safe binning */
__forceinline Vec3ia bin(const Vec3fa& p) const
{
const vint4 i = floori((vfloat4(p)-ofs)*scale);
assert(i[0] >= 0 && (size_t)i[0] < num);
assert(i[1] >= 0 && (size_t)i[1] < num);
assert(i[2] >= 0 && (size_t)i[2] < num);
// we clamp to handle corner cases that could calculate out of bounds bin
return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
}
/*! faster but unsafe binning */
__forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {
return Vec3ia(floori((vfloat4(p)-ofs)*scale));
}
/*! faster but unsafe binning */
template<typename PrimRef>
__forceinline Vec3ia bin_unsafe(const PrimRef& p) const {
return bin_unsafe(p.binCenter());
}
/*! faster but unsafe binning */
template<typename PrimRef, typename BinBoundsAndCenter>
__forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {
return bin_unsafe(binBoundsAndCenter.binCenter(p));
}
template<typename PrimRef>
__forceinline bool bin_unsafe(const PrimRef& ref,
const vint4& vSplitPos,
const vbool4& splitDimMask) const // FIXME: rename to isLeft
{
return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);
}
/*! calculates left spatial position of bin */
__forceinline float pos(const size_t bin, const size_t dim) const {
return madd(float(bin),1.0f / scale[dim],ofs[dim]);
}
/*! returns true if the mapping is invalid in some dimension */
__forceinline bool invalid(const size_t dim) const {
return scale[dim] == 0.0f;
}
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {
return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";
}
public:
size_t num;
vfloat4 ofs,scale; //!< linear function that maps to bin ID
};
/*! stores all information to perform some split */
template<size_t BINS>
struct BinSplit
{
enum
{
SPLIT_OBJECT = 0,
SPLIT_FALLBACK = 1,
SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already
SPLIT_TEMPORAL = 2,
SPLIT_GEOMID = 3,
};
/*! construct an invalid split by default */
__forceinline BinSplit()
: sah(inf), dim(-1), pos(0), data(0) {}
__forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)
: sah(sah), dim(dim), fpos(fpos), data(data) {}
/*! constructs specified split */
__forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)
: sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}
/*! tests if this split is valid */
__forceinline bool valid() const { return dim != -1; }
/*! calculates surface area heuristic for performing the split */
__forceinline float splitSAH() const { return sah; }
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {
return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";
}
public:
float sah; //!< SAH cost of the split
int dim; //!< split dimension
union { int pos; float fpos; }; //!< bin index for splitting
unsigned int data; //!< extra optional split data
BinMapping<BINS> mapping; //!< mapping into bins
};
/*! stores extended information about the split */
template<typename BBox>
struct SplitInfoT
{
__forceinline SplitInfoT () {}
__forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)
: leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}
public:
size_t leftCount,rightCount;
BBox leftBounds,rightBounds;
};
typedef SplitInfoT<BBox3fa> SplitInfo;
typedef SplitInfoT<LBBox3fa> SplitInfo2;
/*! stores all binning information */
template<size_t BINS, typename PrimRef, typename BBox>
struct __aligned(64) BinInfoT
{
typedef BinSplit<BINS> Split;
typedef vbool4 vbool;
typedef vint4 vint;
typedef vfloat4 vfloat;
__forceinline BinInfoT() {
}
__forceinline BinInfoT(EmptyTy) {
clear();
}
/*! bin access function */
__forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }
__forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }
__forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }
__forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }
__forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }
__forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }
/*! clears the bin info */
__forceinline void clear()
{
for (size_t i=0; i<BINS; i++) {
bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;
counts(i) = vuint4(zero);
}
}
/*! bins an array of primitives */
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)
{
if (unlikely(N == 0)) return;
size_t i;
for (i=0; i<N-1; i+=2)
{
/*! map even and odd primitive to bin */
BBox prim0; Vec3fa center0;
prims[i+0].binBoundsAndCenter(prim0,center0);
const vint4 bin0 = (vint4)mapping.bin(center0);
BBox prim1; Vec3fa center1;
prims[i+1].binBoundsAndCenter(prim1,center1);
const vint4 bin1 = (vint4)mapping.bin(center1);
/*! increase bounds for bins for even primitive */
const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);
const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);
const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);
const unsigned int s0 = (unsigned int)prims[i+0].size();
counts(b00,0)+=s0;
counts(b01,1)+=s0;
counts(b02,2)+=s0;
/*! increase bounds of bins for odd primitive */
const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);
const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);
const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);
const unsigned int s1 = (unsigned int)prims[i+1].size();
counts(b10,0)+=s1;
counts(b11,1)+=s1;
counts(b12,2)+=s1;
}
/*! for uneven number of primitives */
if (i < N)
{
/*! map primitive to bin */
BBox prim0; Vec3fa center0;
prims[i].binBoundsAndCenter(prim0,center0);
const vint4 bin0 = (vint4)mapping.bin(center0);
/*! increase bounds of bins */
const unsigned int s0 = (unsigned int)prims[i].size();
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
}
}
/*! bins an array of primitives */
template<typename BinBoundsAndCenter>
__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
{
if (N == 0) return;
size_t i;
for (i=0; i<N-1; i+=2)
{
/*! map even and odd primitive to bin */
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
const vint4 bin0 = (vint4)mapping.bin(center0);
BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);
const vint4 bin1 = (vint4)mapping.bin(center1);
/*! increase bounds for bins for even primitive */
const unsigned int s0 = prims[i+0].size();
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
/*! increase bounds of bins for odd primitive */
const unsigned int s1 = prims[i+1].size();
const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);
const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);
const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);
}
/*! for uneven number of primitives */
if (i < N)
{
/*! map primitive to bin */
BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);
const vint4 bin0 = (vint4)mapping.bin(center0);
/*! increase bounds of bins */
const unsigned int s0 = prims[i+0].size();
const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);
const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);
const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);
}
}
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {
bin(prims+begin,end-begin,mapping);
}
template<typename BinBoundsAndCenter>
__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {
bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);
}
/*! merges in other binning information */
__forceinline void merge (const BinInfoT& other, size_t numBins)
{
for (size_t i=0; i<numBins; i++)
{
counts(i) += other.counts(i);
bounds(i,0).extend(other.bounds(i,0));
bounds(i,1).extend(other.bounds(i,1));
bounds(i,2).extend(other.bounds(i,2));
}
}
/*! reduces binning information */
static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)
{
BinInfoT c;
for (size_t i=0; i<numBins; i++)
{
c.counts(i) = a.counts(i)+b.counts(i);
c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));
c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));
c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));
}
return c;
}
/*! finds the best split by scanning binning information */
__forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const
{
/* sweep from right to left and compute parallel prefix of merged bounds */
vfloat4 rAreas[BINS];
vuint4 rCounts[BINS];
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
for (size_t i=mapping.size()-1; i>0; i--)
{
count += counts(i);
rCounts[i] = count;
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
rAreas[i][3] = 0.0f;
}
/* sweep from left to right and compute SAH */
vuint4 blocks_add = (1 << blocks_shift)-1;
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
count = 0; bx = empty; by = empty; bz = empty;
for (size_t i=1; i<mapping.size(); i++, ii+=1)
{
count += counts(i-1);
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
const vfloat4 rArea = rAreas[i];
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
//const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
}
/* find best dimension */
float bestSAH = inf;
int bestDim = -1;
int bestPos = 0;
for (int dim=0; dim<3; dim++)
{
/* ignore zero sized dimensions */
if (unlikely(mapping.invalid(dim)))
continue;
/* test if this is a better dimension */
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
bestDim = dim;
bestPos = vbestPos[dim];
bestSAH = vbestSAH[dim];
}
}
return Split(bestSAH,bestDim,bestPos,mapping);
}
/*! finds the best split by scanning binning information */
__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const
{
/* sweep from right to left and compute parallel prefix of merged bounds */
vfloat4 rAreas[BINS];
vuint4 rCounts[BINS];
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
for (size_t i=mapping.size()-1; i>0; i--)
{
count += counts(i);
rCounts[i] = count;
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
rAreas[i][3] = 0.0f;
}
/* sweep from left to right and compute SAH */
vuint4 blocks_add = blockSize-1;
vfloat4 blocks_factor = 1.0f/float(blockSize);
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
count = 0; bx = empty; by = empty; bz = empty;
for (size_t i=1; i<mapping.size(); i++, ii+=1)
{
count += counts(i-1);
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
const vfloat4 rArea = rAreas[i];
const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);
const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);
const vfloat4 sah = madd(lArea,lCount,rArea*rCount);
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
}
/* find best dimension */
float bestSAH = inf;
int bestDim = -1;
int bestPos = 0;
for (int dim=0; dim<3; dim++)
{
/* ignore zero sized dimensions */
if (unlikely(mapping.invalid(dim)))
continue;
/* test if this is a better dimension */
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
bestDim = dim;
bestPos = vbestPos[dim];
bestSAH = vbestSAH[dim];
}
}
return Split(bestSAH,bestDim,bestPos,mapping);
}
/*! calculates extended split information */
__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
{
if (split.dim == -1) {
new (&info) SplitInfoT<BBox>(0,empty,0,empty);
return;
}
size_t leftCount = 0;
BBox leftBounds = empty;
for (size_t i=0; i<(size_t)split.pos; i++) {
leftCount += counts(i,split.dim);
leftBounds.extend(bounds(i,split.dim));
}
size_t rightCount = 0;
BBox rightBounds = empty;
for (size_t i=split.pos; i<mapping.size(); i++) {
rightCount += counts(i,split.dim);
rightBounds.extend(bounds(i,split.dim));
}
new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);
}
/*! gets the number of primitives left of the split */
__forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const
{
if (unlikely(split.dim == -1)) return -1;
size_t leftCount = 0;
for (size_t i = 0; i < (size_t)split.pos; i++) {
leftCount += counts(i, split.dim);
}
return leftCount;
}
/*! gets the number of primitives right of the split */
__forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const
{
if (unlikely(split.dim == -1)) return -1;
size_t rightCount = 0;
for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {
rightCount += counts(i, split.dim);
}
return rightCount;
}
private:
BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins
};
}
template<typename BinInfoT, typename BinMapping, typename PrimRef>
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)
{
if (likely(end-begin < parallelThreshold)) {
binner.bin(prims,begin,end,mapping);
} else {
binner = parallel_reduce(begin,end,blockSize,binner,
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
}
}
template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
{
if (likely(end-begin < parallelThreshold)) {
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
} else {
binner = parallel_reduce(begin,end,blockSize,binner,
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
}
}
template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)
{
if (!parallel) {
binner.bin(prims,begin,end,mapping);
} else {
binner = parallel_reduce(begin,end,blockSize,binner,
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
}
}
template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>
__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)
{
if (!parallel) {
binner.bin(prims,begin,end,mapping,binBoundsAndCenter);
} else {
binner = parallel_reduce(begin,end,blockSize,binner,
[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },
[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });
}
}
}

View file

@ -0,0 +1,249 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "heuristic_binning.h"
namespace embree
{
namespace isa
{
struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
{
__forceinline PrimInfoRange () {
}
__forceinline PrimInfoRange(const PrimInfo& pinfo)
: CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
__forceinline PrimInfoRange(EmptyTy)
: CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
__forceinline float leafSAH() const {
return expectedApproxHalfArea(geomBounds)*float(size());
}
__forceinline float leafSAH(size_t block_shift) const {
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
}
__forceinline range<size_t> get_range() const {
return range<size_t>(begin(),end());
}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeomBBox3fa::extend_primref(prim);
_end++;
}
};
inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
const size_t begin = pinfo.begin();
const size_t end = pinfo.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend_center2(prims[i]);
new (&linfo) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend_center2(prims[i]);
new (&rinfo) PrimInfoRange(center,end,right);
}
template<typename Type, typename getTypeFunc>
inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
CentGeomBBox3fa local_left(empty), local_right(empty);
auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
}
/*! Performs standard object binning */
template<typename PrimRef, size_t BINS>
struct HeuristicArrayBinningSAH
{
typedef BinSplit<BINS> Split;
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
typedef range<size_t> Set;
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
__forceinline HeuristicArrayBinningSAH ()
: prims(nullptr) {}
/*! remember prim array */
__forceinline HeuristicArrayBinningSAH (PrimRef* prims)
: prims(prims) {}
/*! finds the best split */
__noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
{
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
return find_template<false>(pinfo,logBlockSize);
else
return find_template<true>(pinfo,logBlockSize);
}
template<bool parallel>
__forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
{
Binner binner(empty);
const BinMapping<BINS> mapping(pinfo);
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
return binner.best(mapping,logBlockSize);
}
/*! finds the best split */
__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
{
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
return find_block_size_template<false>(pinfo,blockSize);
else
return find_block_size_template<true>(pinfo,blockSize);
}
template<bool parallel>
__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
{
Binner binner(empty);
const BinMapping<BINS> mapping(pinfo);
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
return binner.best_block_size(mapping,blockSize);
}
/*! array partitioning */
__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
split_template<false>(split,pinfo,linfo,rinfo);
else
split_template<true>(split,pinfo,linfo,rinfo);
}
template<bool parallel>
__forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
if (!split.valid()) {
deterministic_order(set);
return splitFallback(set,lset,rset);
}
const size_t begin = set.begin();
const size_t end = set.end();
CentGeomBBox3fa local_left(empty);
CentGeomBBox3fa local_right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
const typename Binner::vint vSplitPos(splitPos);
const typename Binner::vbool vSplitMask(splitDimMask);
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
size_t center = 0;
if (!parallel)
center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
else
center = parallel_partitioning(
prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
PARALLEL_PARTITION_BLOCK_SIZE);
new (&lset) PrimInfoRange(begin,center,local_left);
new (&rset) PrimInfoRange(center,end,local_right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
}
void deterministic_order(const PrimInfoRange& pinfo)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
}
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
performFallbackSplit(prims,pinfo,linfo,rinfo);
}
void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
assert(range.size() > 1);
CentGeomBBox3fa left(empty);
CentGeomBBox3fa right(empty);
unsigned int geomID = prims[range.begin()].geomID();
size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
[&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
[ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
new (&linfo) PrimInfoRange(range.begin(),center,left);
new (&rinfo) PrimInfoRange(center,range.end(),right);
}
private:
PrimRef* const prims;
};
#if !defined(RTHWIF_STANDALONE)
/*! Performs standard object binning */
template<typename PrimRefMB, size_t BINS>
struct HeuristicArrayBinningMB
{
typedef BinSplit<BINS> Split;
typedef typename PrimRefMB::BBox BBox;
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
/*! finds the best split */
const Split find(const SetMB& set, const size_t logBlockSize)
{
ObjectBinner binner(empty);
const BinMapping<BINS> mapping(set.size(),set.centBounds);
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
Split osplit = binner.best(mapping,logBlockSize);
osplit.sah *= set.time_range.size();
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
return osplit;
}
/*! array partitioning */
__forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfoMB left = empty;
PrimInfoMB right = empty;
const vint4 vSplitPos(split.pos);
const vbool4 vSplitMask(1 << split.dim);
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
}
};
#endif
}
}

View file

@ -0,0 +1,302 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "heuristic_binning.h"
namespace embree
{
namespace isa
{
/*! Performs standard object binning */
template<typename PrimRef, size_t BINS>
struct UnalignedHeuristicArrayBinningSAH
{
typedef BinSplit<BINS> Split;
typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
typedef range<size_t> Set;
__forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?
: scene(nullptr), prims(nullptr) {}
/*! remember prim array */
__forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)
: scene(scene), prims(prims) {}
const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)
{
Vec3fa axis(0,0,1);
uint64_t bestGeomPrimID = -1;
/*! find curve with minimum ID that defines valid direction */
for (size_t i=set.begin(); i<set.end(); i++)
{
const unsigned int geomID = prims[i].geomID();
const unsigned int primID = prims[i].primID();
const uint64_t geomprimID = prims[i].ID64();
if (geomprimID >= bestGeomPrimID) continue;
const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);
if (sqr_length(axis1) > 1E-18f) {
axis = normalize(axis1);
bestGeomPrimID = geomprimID;
}
}
return frame(axis).transposed();
}
const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)
{
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
{
CentGeomBBox3fa bounds(empty);
for (size_t i=r.begin(); i<r.end(); i++) {
Geometry* mesh = scene->get(prims[i].geomID());
bounds.extend(mesh->vbounds(space,prims[i].primID()));
}
return bounds;
};
const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),
CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
return PrimInfo(set.begin(),set.end(),bounds);
}
struct BinBoundsAndCenter
{
__forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)
: scene(scene), space(space) {}
/*! returns center for binning */
__forceinline Vec3fa binCenter(const PrimRef& ref) const
{
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
BBox3fa bounds = mesh->vbounds(space,ref.primID());
return embree::center2(bounds);
}
/*! returns bounds and centroid used for binning */
__forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const
{
Geometry* mesh = (Geometry*) scene->get(ref.geomID());
BBox3fa bounds = mesh->vbounds(space,ref.primID());
bounds_o = bounds;
center_o = embree::center2(bounds);
}
private:
Scene* scene;
const LinearSpace3fa space;
};
/*! finds the best split */
__forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)
{
if (likely(pinfo.size() < 10000))
return find_template<false>(pinfo,logBlockSize,space);
else
return find_template<true>(pinfo,logBlockSize,space);
}
/*! finds the best split */
template<bool parallel>
const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)
{
Binner binner(empty);
const BinMapping<BINS> mapping(set);
BinBoundsAndCenter binBoundsAndCenter(scene,space);
bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);
return binner.best(mapping,logBlockSize);
}
/*! array partitioning */
__forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
if (likely(set.size() < 10000))
split_template<false>(split,space,set,lset,rset);
else
split_template<true>(split,space,set,lset,rset);
}
/*! array partitioning */
template<bool parallel>
__forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
if (!split.valid()) {
deterministic_order(set);
return splitFallback(set,lset,rset);
}
const size_t begin = set.begin();
const size_t end = set.end();
CentGeomBBox3fa local_left(empty);
CentGeomBBox3fa local_right(empty);
const int splitPos = split.pos;
const int splitDim = split.dim;
BinBoundsAndCenter binBoundsAndCenter(scene,space);
size_t center = 0;
if (likely(set.size() < 10000))
center = serial_partitioning(prims,begin,end,local_left,local_right,
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
else
center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },
[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
128);
new (&lset) PrimInfoRange(begin,center,local_left);
new (&rset) PrimInfoRange(center,end,local_right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
}
void deterministic_order(const range<size_t>& set)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims[set.begin()],&prims[set.end()]);
}
void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend_center2(prims[i]);
new (&lset) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend_center2(prims[i]);
new (&rset) PrimInfoRange(center,end,right);
}
private:
Scene* const scene;
PrimRef* const prims;
};
/*! Performs standard object binning */
template<typename PrimRefMB, size_t BINS>
struct UnalignedHeuristicArrayBinningMB
{
typedef BinSplit<BINS> Split;
typedef typename PrimRefMB::BBox BBox;
typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
UnalignedHeuristicArrayBinningMB(Scene* scene)
: scene(scene) {}
const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)
{
Vec3fa axis0(0,0,1);
uint64_t bestGeomPrimID = -1;
/*! find curve with minimum ID that defines valid direction */
for (size_t i=set.begin(); i<set.end(); i++)
{
const PrimRefMB& prim = (*set.prims)[i];
const unsigned int geomID = prim.geomID();
const unsigned int primID = prim.primID();
const uint64_t geomprimID = prim.ID64();
if (geomprimID >= bestGeomPrimID) continue;
const Geometry* mesh = scene->get(geomID);
const range<int> tbounds = mesh->timeSegmentRange(set.time_range);
if (tbounds.size() == 0) continue;
const size_t t = (tbounds.begin()+tbounds.end())/2;
const Vec3fa axis1 = mesh->computeDirection(primID,t);
if (sqr_length(axis1) > 1E-18f) {
axis0 = normalize(axis1);
bestGeomPrimID = geomprimID;
}
}
return frame(axis0).transposed();
}
struct BinBoundsAndCenter
{
__forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)
: scene(scene), time_range(time_range), space(space) {}
/*! returns center for binning */
template<typename PrimRef>
__forceinline Vec3fa binCenter(const PrimRef& ref) const
{
Geometry* mesh = scene->get(ref.geomID());
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
return center2(lbounds.interpolate(0.5f));
}
/*! returns bounds and centroid used for binning */
__noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
{
Geometry* mesh = scene->get(ref.geomID());
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
bounds_o = lbounds.interpolate(0.5f);
center_o = center2(bounds_o);
}
/*! returns bounds and centroid used for binning */
__noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX
{
Geometry* mesh = scene->get(ref.geomID());
LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);
bounds_o = lbounds;
center_o = center2(lbounds.interpolate(0.5f));
}
private:
Scene* scene;
BBox1f time_range;
const LinearSpace3fa space;
};
/*! finds the best split */
const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)
{
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
ObjectBinner binner(empty);
const BinMapping<BINS> mapping(set.size(),set.centBounds);
bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);
Split osplit = binner.best(mapping,logBlockSize);
osplit.sah *= set.time_range.size();
if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
return osplit;
}
/*! array partitioning */
__forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)
{
BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfoMB left = empty;
PrimInfoMB right = empty;
const vint4 vSplitPos(split.pos);
const vbool4 vSplitMask(1 << split.dim);
auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };
auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
}
private:
Scene* scene;
};
}
}

View file

@ -0,0 +1,443 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
// TODO:
// - adjust parallel build thresholds
// - openNodesBasedOnExtend should consider max extended size
#pragma once
#include "heuristic_binning.h"
#include "heuristic_spatial.h"
/* stop opening of all bref.geomIDs are the same */
#define EQUAL_GEOMID_STOP_CRITERIA 1
/* 10% spatial extend threshold */
#define MAX_EXTEND_THRESHOLD 0.1f
/* maximum is 8 children */
#define MAX_OPENED_CHILD_NODES 8
/* open until all build refs are below threshold size in one step */
#define USE_LOOP_OPENING 0
namespace embree
{
namespace isa
{
/*! Performs standard object binning */
template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
struct HeuristicArrayOpenMergeSAH
{
typedef BinSplit<OBJECT_BINS> Split;
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
static const size_t PARALLEL_THRESHOLD = 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
static const size_t MOVE_STEP_SIZE = 64;
static const size_t CREATE_SPLITS_STEP_SIZE = 128;
__forceinline HeuristicArrayOpenMergeSAH ()
: prims0(nullptr) {}
/*! remember prim array */
__forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
: prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
{
assert(max_open_size <= MAX_OPENED_CHILD_NODES);
}
struct OpenHeuristic
{
__forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
{
const Vec3fa diag = pinfo.geomBounds.size();
dim = maxDim(diag);
assert(diag[dim] > 0.0f);
inv_max_extend = 1.0f / diag[dim];
}
__forceinline bool operator () ( PrimRef& prim ) const {
return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
}
private:
size_t dim;
float inv_max_extend;
};
/*! compute extended ranges */
__forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
{
assert(set.ext_range_size() > 0);
const float left_factor = (float)lweight / (lweight + rweight);
const size_t ext_range_size = set.ext_range_size();
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
lset.set_ext_range(lset.end() + left_ext_range_size);
rset.set_ext_range(rset.end() + right_ext_range_size);
}
/*! move ranges */
__forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t left_ext_range_size = lset.ext_range_size();
const size_t right_size = rset.size();
/* has the left child an extended range? */
if (left_ext_range_size > 0)
{
/* left extended range smaller than right range ? */
if (left_ext_range_size < right_size)
{
/* only move a small part of the beginning of the right range to the end */
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
prims0[i+right_size] = prims0[i];
});
}
else
{
/* no overlap, move entire right range to new location, can be made fully parallel */
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
prims0[i+left_ext_range_size] = prims0[i];
});
}
/* update right range */
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
rset.move_right(left_ext_range_size);
}
}
/* estimates the extra space required when opening, and checks if all primitives are from same geometry */
__noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
{
const OpenHeuristic heuristic(set);
const unsigned int geomID = prims0[set.begin()].geomID();
auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
bool commonGeomID = true;
size_t opens = 0;
for (size_t i=r.begin(); i<r.end(); i++) {
commonGeomID &= prims0[i].geomID() == geomID;
if (heuristic(prims0[i]))
opens += prims0[i].node.getN()-1; // coarse approximation
}
return std::pair<size_t,bool>(opens,commonGeomID);
};
auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
};
return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
}
// FIXME: should consider maximum available extended size
__noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
{
const OpenHeuristic heuristic(set);
const size_t ext_range_start = set.end();
if (false && set.size() < PARALLEL_THRESHOLD)
{
size_t extra_elements = 0;
for (size_t i=set.begin(); i<set.end(); i++)
{
if (heuristic(prims0[i]))
{
PrimRef tmp[MAX_OPENED_CHILD_NODES];
const size_t n = nodeOpenerFunc(prims0[i],tmp);
assert(extra_elements + n-1 <= set.ext_range_size());
for (size_t j=0; j<n; j++)
set.extend_center2(tmp[j]);
prims0[i] = tmp[0];
for (size_t j=1; j<n; j++)
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
extra_elements += n-1;
}
}
set._end += extra_elements;
}
else
{
std::atomic<size_t> ext_elements;
ext_elements.store(0);
PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
PrimInfo info(empty);
for (size_t i=r.begin(); i<r.end(); i++)
if (heuristic(prims0[i]))
{
PrimRef tmp[MAX_OPENED_CHILD_NODES];
const size_t n = nodeOpenerFunc(prims0[i],tmp);
const size_t ID = ext_elements.fetch_add(n-1);
assert(ID + n-1 <= set.ext_range_size());
for (size_t j=0; j<n; j++)
info.extend_center2(tmp[j]);
prims0[i] = tmp[0];
for (size_t j=1; j<n; j++)
prims0[ext_range_start+ID+j-1] = tmp[j];
}
return info;
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
set.centBounds.extend(info.centBounds);
assert(ext_elements.load() <= set.ext_range_size());
set._end += ext_elements.load();
}
}
__noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
{
const OpenHeuristic heuristic(set);
size_t next_iteration_extra_elements = est_new_elements;
while (next_iteration_extra_elements <= set.ext_range_size())
{
next_iteration_extra_elements = 0;
size_t extra_elements = 0;
const size_t ext_range_start = set.end();
for (size_t i=set.begin(); i<set.end(); i++)
{
if (heuristic(prims0[i]))
{
PrimRef tmp[MAX_OPENED_CHILD_NODES];
const size_t n = nodeOpenerFunc(prims0[i],tmp);
assert(extra_elements + n-1 <= set.ext_range_size());
for (size_t j=0;j<n;j++)
set.extend_center2(tmp[j]);
prims0[i] = tmp[0];
for (size_t j=1;j<n;j++)
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
extra_elements += n-1;
for (size_t j=0; j<n; j++)
if (heuristic(tmp[j]))
next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
}
}
assert( extra_elements <= set.ext_range_size());
set._end += extra_elements;
for (size_t i=set.begin();i<set.end();i++)
assert(prims0[i].numPrimitives() > 0);
if (unlikely(next_iteration_extra_elements == 0)) break;
}
}
__noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
{
/* single element */
if (set.size() <= 1)
return Split();
/* disable opening if there is no overlap */
const size_t D = 4;
if (unlikely(set.has_ext_range() && set.size() <= D))
{
bool disjoint = true;
for (size_t j=set.begin(); j<set.end()-1; j++) {
for (size_t i=set.begin()+1; i<set.end(); i++) {
if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
disjoint = false; break;
}
}
}
if (disjoint) set.set_ext_range(set.end()); /* disables opening */
}
std::pair<size_t,bool> p(0,false);
/* disable opening when all primitives are from same geometry */
if (unlikely(set.has_ext_range()))
{
p = getProperties(set);
#if EQUAL_GEOMID_STOP_CRITERIA == 1
if (p.second) set.set_ext_range(set.end()); /* disable opening */
#endif
}
/* open nodes when we have sufficient space available */
if (unlikely(set.has_ext_range()))
{
#if USE_LOOP_OPENING == 1
openNodesBasedOnExtendLoop(set,p.first);
#else
if (p.first <= set.ext_range_size())
openNodesBasedOnExtend(set);
#endif
/* disable opening when insufficient space for opening a node available */
if (set.ext_range_size() < max_open_size-1)
set.set_ext_range(set.end()); /* disable opening */
}
/* find best split */
return object_find(set,logBlockSize);
}
/*! finds the best object split */
__forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
{
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
else return parallel_object_find (set,logBlockSize);
}
/*! finds the best object split */
__noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
Binner binner(empty);
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
binner.bin(prims0,set.begin(),set.end(),mapping);
return binner.best(mapping,logBlockSize);
}
/*! finds the best split */
__noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
Binner binner(empty);
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
auto body = [&] (const range<size_t>& r) -> Binner {
Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
};
auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
Binner r = b0; r.merge(b1,_mapping.size()); return r;
};
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
return binner.best(mapping,logBlockSize);
}
/*! array partitioning */
__noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
PrimInfoExtRange set = set_i;
/* valid split */
if (unlikely(!split.valid())) {
deterministic_order(set);
splitFallback(set,lset,rset);
return;
}
std::pair<size_t,size_t> ext_weights(0,0);
/* object split */
if (likely(set.size() < PARALLEL_THRESHOLD))
ext_weights = sequential_object_split(split,set,lset,rset);
else
ext_weights = parallel_object_split(split,set,lset,rset);
/* if we have an extended range, set extended child ranges and move right split range */
if (unlikely(set.has_ext_range()))
{
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
moveExtentedRange(set,lset,rset);
}
}
/*! array partitioning */
std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo local_left(empty);
PrimInfo local_right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
const vint4 vSplitPos(splitPos);
const vbool4 vSplitMask( (int)splitDimMask );
size_t center = serial_partitioning(prims0,
begin,end,local_left,local_right,
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
new (&rset) PrimInfoExtRange(center,end,end,local_right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(local_left.size(),local_right.size());
}
/*! array partitioning */
__noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo left(empty);
PrimInfo right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
const vint4 vSplitPos(splitPos);
const vbool4 vSplitMask( (int)splitDimMask );
auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
const size_t center = parallel_partitioning(
prims0,begin,end,EmptyTy(),left,right,isLeft,
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
[] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
PARALLEL_PARTITION_BLOCK_SIZE);
new (&lset) PrimInfoExtRange(begin,center,center,left);
new (&rset) PrimInfoExtRange(center,end,end,right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(left.size(),right.size());
}
void deterministic_order(const extended_range<size_t>& set)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims0[set.begin()],&prims0[set.end()]);
}
__forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
PrimInfo left(empty);
for (size_t i=begin; i<center; i++)
left.add_center2(prims0[i]);
const size_t lweight = left.end;
PrimInfo right(empty);
for (size_t i=center; i<end; i++)
right.add_center2(prims0[i]);
const size_t rweight = right.end;
new (&lset) PrimInfoExtRange(begin,center,center,left);
new (&rset) PrimInfoExtRange(center,end,end,right);
/* if we have an extended range */
if (set.has_ext_range())
{
setExtentedRanges(set,lset,rset,lweight,rweight);
moveExtentedRange(set,lset,rset);
}
}
private:
PrimRef* const prims0;
const NodeOpenerFunc& nodeOpenerFunc;
size_t max_open_size;
};
}
}

View file

@ -0,0 +1,366 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "priminfo.h"
namespace embree
{
static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
namespace isa
{
/*! mapping into bins */
template<size_t BINS>
struct SpatialBinMapping
{
public:
__forceinline SpatialBinMapping() {}
/*! calculates the mapping */
__forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
{
const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
ofs = (vfloat4) pinfo.geomBounds.lower;
inv_scale = 1.0f / scale;
}
/*! slower but safe binning */
__forceinline vint4 bin(const Vec3fa& p) const
{
const vint4 i = floori((vfloat4(p)-ofs)*scale);
return clamp(i,vint4(0),vint4(BINS-1));
}
__forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
{
#if defined(__AVX__)
const vfloat8 ofs8(ofs);
const vfloat8 scale8(scale);
const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
#else
const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
return std::pair<vint4,vint4>(c_lower,c_upper);
#endif
}
/*! calculates left spatial position of bin */
__forceinline float pos(const size_t bin, const size_t dim) const {
return madd(float(bin),inv_scale[dim],ofs[dim]);
}
/*! calculates left spatial position of bin */
template<size_t N>
__forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
}
/*! returns true if the mapping is invalid in some dimension */
__forceinline bool invalid(const size_t dim) const {
return scale[dim] == 0.0f;
}
public:
vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
};
/*! stores all information required to perform some split */
template<size_t BINS>
struct SpatialBinSplit
{
/*! construct an invalid split by default */
__forceinline SpatialBinSplit()
: sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
/*! constructs specified split */
__forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
: sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
/*! constructs specified split */
__forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
: sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
/*! tests if this split is valid */
__forceinline bool valid() const { return dim != -1; }
/*! calculates surface area heuristic for performing the split */
__forceinline float splitSAH() const { return sah; }
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
}
public:
float sah; //!< SAH cost of the split
int dim; //!< split dimension
int pos; //!< split position
int left; //!< number of elements on the left side
int right; //!< number of elements on the right side
float factor; //!< factor splitting the extended range
SpatialBinMapping<BINS> mapping; //!< mapping into bins
};
/*! stores all binning information */
template<size_t BINS, typename PrimRef>
struct __aligned(64) SpatialBinInfo
{
SpatialBinInfo() {
}
__forceinline SpatialBinInfo(EmptyTy) {
clear();
}
/*! clears the bin info */
__forceinline void clear()
{
for (size_t i=0; i<BINS; i++) {
bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
numBegin[i] = numEnd[i] = 0;
}
}
/*! adds binning data */
__forceinline void add(const size_t dim,
const size_t beginID,
const size_t endID,
const size_t binID,
const BBox3fa &b,
const size_t n = 1)
{
assert(beginID < BINS);
assert(endID < BINS);
assert(binID < BINS);
numBegin[beginID][dim]+=(unsigned int)n;
numEnd [endID][dim]+=(unsigned int)n;
bounds [binID][dim].extend(b);
}
/*! extends binning bounds */
__forceinline void extend(const size_t dim,
const size_t binID,
const BBox3fa &b)
{
assert(binID < BINS);
bounds [binID][dim].extend(b);
}
/*! bins an array of primitives */
template<typename PrimitiveSplitterFactory>
__forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
{
for (size_t i=begin; i<end; i++)
{
const PrimRef& prim = source[i];
unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
if (unlikely(splits <= 1))
{
const vint4 bin = mapping.bin(center(prim.bounds()));
for (size_t dim=0; dim<3; dim++)
{
assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
}
}
else
{
const vint4 bin0 = mapping.bin(prim.bounds().lower);
const vint4 bin1 = mapping.bin(prim.bounds().upper);
for (size_t dim=0; dim<3; dim++)
{
if (unlikely(mapping.invalid(dim)))
continue;
size_t bin;
size_t l = bin0[dim];
size_t r = bin1[dim];
// same bin optimization
if (likely(l == r))
{
add(dim,l,l,l,prim.bounds());
continue;
}
size_t bin_start = bin0[dim];
size_t bin_end = bin1[dim];
BBox3fa rest = prim.bounds();
/* assure that split position always overlaps the primitive bounds */
while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--;
const auto splitter = splitterFactory(prim);
for (bin=bin_start; bin<bin_end; bin++)
{
const float pos = mapping.pos(bin+1,dim);
BBox3fa left,right;
splitter(rest,dim,pos,left,right);
if (unlikely(left.empty())) l++;
extend(dim,bin,left);
rest = right;
}
if (unlikely(rest.empty())) r--;
add(dim,l,r,bin,rest);
}
}
}
}
/*! bins an array of primitives */
__forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
{
for (size_t i=begin; i<end; i++)
{
const PrimRef &prim = source[i];
const vint4 bin0 = mapping.bin(prim.bounds().lower);
const vint4 bin1 = mapping.bin(prim.bounds().upper);
for (size_t dim=0; dim<3; dim++)
{
if (unlikely(mapping.invalid(dim)))
continue;
const size_t l = bin0[dim];
const size_t r = bin1[dim];
const unsigned int n = prim.primID();
// same bin optimization
if (likely(l == r))
{
add(dim,l,l,l,prim.bounds(),n);
continue;
}
const size_t bin_start = bin0[dim];
const size_t bin_end = bin1[dim];
for (size_t bin=bin_start; bin<bin_end; bin++)
add(dim,l,r,bin,prim.bounds(),n);
}
}
}
/*! merges in other binning information */
void merge (const SpatialBinInfo& other)
{
for (size_t i=0; i<BINS; i++)
{
numBegin[i] += other.numBegin[i];
numEnd [i] += other.numEnd [i];
bounds[i][0].extend(other.bounds[i][0]);
bounds[i][1].extend(other.bounds[i][1]);
bounds[i][2].extend(other.bounds[i][2]);
}
}
/*! merges in other binning information */
static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
{
SpatialBinInfo c(empty);
for (size_t i=0; i<BINS; i++)
{
c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
}
return c;
}
/*! finds the best split by scanning binning information */
SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
{
/* sweep from right to left and compute parallel prefix of merged bounds */
vfloat4 rAreas[BINS];
vuint4 rCounts[BINS];
vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
for (size_t i=BINS-1; i>0; i--)
{
count += numEnd[i];
rCounts[i] = count;
bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
rAreas[i][3] = 0.0f;
}
/* sweep from left to right and compute SAH */
vuint4 blocks_add = (1 << blocks_shift)-1;
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
count = 0; bx = empty; by = empty; bz = empty;
for (size_t i=1; i<BINS; i++, ii+=1)
{
count += numBegin[i-1];
bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
by.extend(bounds[i-1][1]); float Ay = halfArea(by);
bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
const vfloat4 rArea = rAreas[i];
const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
// const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
const vbool4 mask = sah < vbestSAH;
vbestPos = select(mask,ii ,vbestPos);
vbestSAH = select(mask,sah,vbestSAH);
vbestlCount = select(mask,count,vbestlCount);
vbestrCount = select(mask,rCounts[i],vbestrCount);
}
/* find best dimension */
float bestSAH = inf;
int bestDim = -1;
int bestPos = 0;
unsigned int bestlCount = 0;
unsigned int bestrCount = 0;
for (int dim=0; dim<3; dim++)
{
/* ignore zero sized dimensions */
if (unlikely(mapping.invalid(dim)))
continue;
/* test if this is a better dimension */
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
bestDim = dim;
bestPos = vbestPos[dim];
bestSAH = vbestSAH[dim];
bestlCount = vbestlCount[dim];
bestrCount = vbestrCount[dim];
}
}
assert(bestSAH >= 0.0f);
/* return invalid split if no split found */
if (bestDim == -1)
return SpatialBinSplit<BINS>(inf,-1,0,mapping);
/* return best found split */
return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
}
private:
BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
vuint4 numBegin[BINS]; //!< number of primitives starting in bin
vuint4 numEnd[BINS]; //!< number of primitives ending in bin
};
}
}

View file

@ -0,0 +1,547 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "heuristic_binning.h"
#include "heuristic_spatial.h"
namespace embree
{
namespace isa
{
#if 0
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
#else
#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
#endif
struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
{
__forceinline PrimInfoExtRange() {
}
__forceinline PrimInfoExtRange(EmptyTy)
: CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
__forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)
: CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}
__forceinline float leafSAH() const {
return expectedApproxHalfArea(geomBounds)*float(size());
}
__forceinline float leafSAH(size_t block_shift) const {
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
}
};
template<typename ObjectSplit, typename SpatialSplit>
struct Split2
{
__forceinline Split2 () {}
__forceinline Split2 (const Split2& other)
{
spatial = other.spatial;
sah = other.sah;
if (spatial) spatialSplit() = other.spatialSplit();
else objectSplit() = other.objectSplit();
}
__forceinline Split2& operator= (const Split2& other)
{
spatial = other.spatial;
sah = other.sah;
if (spatial) spatialSplit() = other.spatialSplit();
else objectSplit() = other.objectSplit();
return *this;
}
__forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
__forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
__forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
__forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
__forceinline Split2 (const ObjectSplit& objectSplit, float sah)
: spatial(false), sah(sah)
{
new (data) ObjectSplit(objectSplit);
}
__forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
: spatial(true), sah(sah)
{
new (data) SpatialSplit(spatialSplit);
}
__forceinline float splitSAH() const {
return sah;
}
__forceinline bool valid() const {
return sah < float(inf);
}
public:
__aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
bool spatial;
float sah;
};
/*! Performs standard object binning */
template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>
struct HeuristicArraySpatialSAH
{
typedef BinSplit<OBJECT_BINS> ObjectSplit;
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
//typedef extended_range<size_t> Set;
typedef Split2<ObjectSplit,SpatialSplit> Split;
static const size_t PARALLEL_THRESHOLD = 3*1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
static const size_t MOVE_STEP_SIZE = 64;
static const size_t CREATE_SPLITS_STEP_SIZE = 64;
__forceinline HeuristicArraySpatialSAH ()
: prims0(nullptr) {}
/*! remember prim array */
__forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
: prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
/*! compute extended ranges */
__noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
{
assert(set.ext_range_size() > 0);
const float left_factor = (float)lweight / (lweight + rweight);
const size_t ext_range_size = set.ext_range_size();
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
lset.set_ext_range(lset.end() + left_ext_range_size);
rset.set_ext_range(rset.end() + right_ext_range_size);
}
/*! move ranges */
__noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t left_ext_range_size = lset.ext_range_size();
const size_t right_size = rset.size();
/* has the left child an extended range? */
if (left_ext_range_size > 0)
{
/* left extended range smaller than right range ? */
if (left_ext_range_size < right_size)
{
/* only move a small part of the beginning of the right range to the end */
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
prims0[i+right_size] = prims0[i];
});
}
else
{
/* no overlap, move entire right range to new location, can be made fully parallel */
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++)
prims0[i+left_ext_range_size] = prims0[i];
});
}
/* update right range */
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
rset.move_right(left_ext_range_size);
}
}
/*! finds the best split */
const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
SplitInfo oinfo;
const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
const float object_split_sah = object_split.splitSAH();
if (unlikely(set.has_ext_range()))
{
const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
/* do only spatial splits if the child bounds overlap */
if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
{
const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
const float spatial_split_sah = spatial_split.splitSAH();
/* valid spatial split, better SAH and number of splits do not exceed extended range */
if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
{
return Split(spatial_split,spatial_split_sah);
}
}
}
return Split(object_split,object_split_sah);
}
/*! finds the best object split */
__forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
{
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
else return parallel_object_find (set,logBlockSize,info);
}
/*! finds the best object split */
__noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
{
ObjectBinner binner(empty);
const BinMapping<OBJECT_BINS> mapping(set);
binner.bin(prims0,set.begin(),set.end(),mapping);
ObjectSplit s = binner.best(mapping,logBlockSize);
binner.getSplitInfo(mapping, s, info);
return s;
}
/*! finds the best split */
__noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
{
ObjectBinner binner(empty);
const BinMapping<OBJECT_BINS> mapping(set);
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
[&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
[&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
ObjectSplit s = binner.best(mapping,logBlockSize);
binner.getSplitInfo(mapping, s, info);
return s;
}
/*! finds the best spatial split */
__forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
else return parallel_spatial_find (set, logBlockSize);
}
/*! finds the best spatial split */
__noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
SpatialBinner binner(empty);
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
return binner.best(mapping,logBlockSize); //,set.ext_size());
}
__noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
{
SpatialBinner binner(empty);
const SpatialBinMapping<SPATIAL_BINS> mapping(set);
const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
[&] (const range<size_t>& r) -> SpatialBinner {
SpatialBinner binner(empty);
binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
return binner; },
[&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
return binner.best(mapping,logBlockSize); //,set.ext_size());
}
/*! subdivides primitives based on a spatial split */
__noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
{
assert(set.has_ext_range());
const size_t max_ext_range_size = set.ext_range_size();
const size_t ext_range_start = set.end();
/* atomic counter for number of primref splits */
std::atomic<size_t> ext_elements;
ext_elements.store(0);
const float fpos = split.mapping.pos(split.pos,split.dim);
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
for (size_t i=r.begin();i<r.end();i++)
{
const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
//int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
//int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
//if (unlikely(bin0 < split.pos && bin1 >= split.pos))
if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
{
assert(splits > 1);
PrimRef left,right;
const auto splitter = splitterFactory(prims0[i]);
splitter(prims0[i],split.dim,fpos,left,right);
// no empty splits
if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
const size_t ID = ext_elements.fetch_add(1);
/* break if the number of subdivided elements are greater than the maximum allowed size */
if (unlikely(ID >= max_ext_range_size))
break;
/* only write within the correct bounds */
assert(ID < max_ext_range_size);
prims0[i] = left;
prims0[ext_range_start+ID] = right;
}
}
});
const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
assert(set.end()+numExtElements<=set.ext_end());
set._end += numExtElements;
}
/*! array partitioning */
void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
PrimInfoExtRange set = set_i;
/* valid split */
if (unlikely(!split.valid())) {
deterministic_order(set);
return splitFallback(set,lset,rset);
}
std::pair<size_t,size_t> ext_weights(0,0);
if (unlikely(split.spatial))
{
create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
/* spatial split */
if (likely(set.size() < PARALLEL_THRESHOLD))
ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
else
ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
}
else
{
/* object split */
if (likely(set.size() < PARALLEL_THRESHOLD))
ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
else
ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
}
/* if we have an extended range, set extended child ranges and move right split range */
if (unlikely(set.has_ext_range()))
{
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
moveExtentedRange(set,lset,rset);
}
}
/*! array partitioning */
std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo local_left(empty);
PrimInfo local_right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
const typename ObjectBinner::vint vSplitPos(splitPos);
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
size_t center = serial_partitioning(prims0,
begin,end,local_left,local_right,
[&] (const PrimRef& ref) {
return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
},
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
const size_t left_weight = local_left.end;
const size_t right_weight = local_right.end;
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
new (&rset) PrimInfoExtRange(center,end,end,local_right);
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(left_weight,right_weight);
}
/*! array partitioning */
__noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo local_left(empty);
PrimInfo local_right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
/* init spatial mapping */
const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;
const vint4 vSplitPos(splitPos);
const vbool4 vSplitMask( (int)splitDimMask );
size_t center = serial_partitioning(prims0,
begin,end,local_left,local_right,
[&] (const PrimRef& ref) {
const Vec3fa c = ref.bounds().center();
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
},
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
const size_t left_weight = local_left.end;
const size_t right_weight = local_right.end;
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
new (&rset) PrimInfoExtRange(center,end,end,local_right);
assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(left_weight,right_weight);
}
/*! array partitioning */
__noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo left(empty);
PrimInfo right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
const typename ObjectBinner::vint vSplitPos(splitPos);
const typename ObjectBinner::vbool vSplitMask(splitDimMask);
auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
const size_t center = parallel_partitioning(
prims0,begin,end,EmptyTy(),left,right,isLeft,
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
PARALLEL_PARTITION_BLOCK_SIZE);
const size_t left_weight = left.end;
const size_t right_weight = right.end;
left.begin = begin; left.end = center;
right.begin = center; right.end = end;
new (&lset) PrimInfoExtRange(begin,center,center,left);
new (&rset) PrimInfoExtRange(center,end,end,right);
assert(area(left.geomBounds) >= 0.0f);
assert(area(right.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(left_weight,right_weight);
}
/*! array partitioning */
__noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
PrimInfo left(empty);
PrimInfo right(empty);
const unsigned int splitPos = split.pos;
const unsigned int splitDim = split.dim;
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
/* init spatial mapping */
const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
const vint4 vSplitPos(splitPos);
const vbool4 vSplitMask( (int)splitDimMask );
auto isLeft = [&] (const PrimRef &ref) {
const Vec3fa c = ref.bounds().center();
return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
const size_t center = parallel_partitioning(
prims0,begin,end,EmptyTy(),left,right,isLeft,
[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },
[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
PARALLEL_PARTITION_BLOCK_SIZE);
const size_t left_weight = left.end;
const size_t right_weight = right.end;
left.begin = begin; left.end = center;
right.begin = center; right.end = end;
new (&lset) PrimInfoExtRange(begin,center,center,left);
new (&rset) PrimInfoExtRange(center,end,end,right);
assert(area(left.geomBounds) >= 0.0f);
assert(area(right.geomBounds) >= 0.0f);
return std::pair<size_t,size_t>(left_weight,right_weight);
}
void deterministic_order(const PrimInfoExtRange& set)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims0[set.begin()],&prims0[set.end()]);
}
void splitFallback(const PrimInfoExtRange& set,
PrimInfoExtRange& lset,
PrimInfoExtRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
PrimInfo left(empty);
for (size_t i=begin; i<center; i++) {
left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
}
const size_t lweight = left.end;
PrimInfo right(empty);
for (size_t i=center; i<end; i++) {
right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));
}
const size_t rweight = right.end;
new (&lset) PrimInfoExtRange(begin,center,center,left);
new (&rset) PrimInfoExtRange(center,end,end,right);
/* if we have an extended range */
if (set.has_ext_range()) {
setExtentedRanges(set,lset,rset,lweight,rweight);
moveExtentedRange(set,lset,rset);
}
}
private:
PrimRef* const prims0;
const PrimitiveSplitterFactory& splitterFactory;
const CentGeomBBox3fa& root_info;
};
}
}

View file

@ -0,0 +1,188 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "priminfo.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_partition.h"
namespace embree
{
namespace isa
{
/*! Performs standard object binning */
struct HeuristicStrandSplit
{
typedef range<size_t> Set;
static const size_t PARALLEL_THRESHOLD = 10000;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
/*! stores all information to perform some split */
struct Split
{
/*! construct an invalid split by default */
__forceinline Split()
: sah(inf), axis0(zero), axis1(zero) {}
/*! constructs specified split */
__forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
: sah(sah), axis0(axis0), axis1(axis1) {}
/*! calculates standard surface area heuristic for the split */
__forceinline float splitSAH() const { return sah; }
/*! test if this split is valid */
__forceinline bool valid() const { return sah != float(inf); }
public:
float sah; //!< SAH cost of the split
Vec3fa axis0, axis1; //!< axis the two strands are aligned into
};
__forceinline HeuristicStrandSplit () // FIXME: required?
: scene(nullptr), prims(nullptr) {}
/*! remember prim array */
__forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
: scene(scene), prims(prims) {}
__forceinline const Vec3fa direction(const PrimRef& prim) {
return scene->get(prim.geomID())->computeDirection(prim.primID());
}
__forceinline const BBox3fa bounds(const PrimRef& prim) {
return scene->get(prim.geomID())->vbounds(prim.primID());
}
__forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim) {
return scene->get(prim.geomID())->vbounds(space,prim.primID());
}
/*! finds the best split */
const Split find(const range<size_t>& set, size_t logBlockSize)
{
Vec3fa axis0(0,0,1);
uint64_t bestGeomPrimID = -1;
/* curve with minimum ID determines first axis */
for (size_t i=set.begin(); i<set.end(); i++)
{
const uint64_t geomprimID = prims[i].ID64();
if (geomprimID >= bestGeomPrimID) continue;
const Vec3fa axis = direction(prims[i]);
if (sqr_length(axis) > 1E-18f) {
axis0 = normalize(axis);
bestGeomPrimID = geomprimID;
}
}
/* find 2nd axis that is most misaligned with first axis and has minimum ID */
float bestCos = 1.0f;
Vec3fa axis1 = axis0;
bestGeomPrimID = -1;
for (size_t i=set.begin(); i<set.end(); i++)
{
const uint64_t geomprimID = prims[i].ID64();
Vec3fa axisi = direction(prims[i]);
float leni = length(axisi);
if (leni == 0.0f) continue;
axisi /= leni;
float cos = abs(dot(axisi,axis0));
if ((cos == bestCos && (geomprimID < bestGeomPrimID)) || cos < bestCos) {
bestCos = cos; axis1 = axisi;
bestGeomPrimID = geomprimID;
}
}
/* partition the two strands */
size_t lnum = 0, rnum = 0;
BBox3fa lbounds = empty, rbounds = empty;
const LinearSpace3fa space0 = frame(axis0).transposed();
const LinearSpace3fa space1 = frame(axis1).transposed();
for (size_t i=set.begin(); i<set.end(); i++)
{
PrimRef& prim = prims[i];
const Vec3fa axisi = normalize(direction(prim));
const float cos0 = abs(dot(axisi,axis0));
const float cos1 = abs(dot(axisi,axis1));
if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
else { rnum++; rbounds.extend(bounds(space1,prim)); }
}
/*! return an invalid split if we do not partition */
if (lnum == 0 || rnum == 0)
return Split(inf,axis0,axis1);
/*! calculate sah for the split */
const size_t lblocks = (lnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
const size_t rblocks = (rnum+(1ull<<logBlockSize)-1ull) >> logBlockSize;
const float sah = madd(float(lblocks),halfArea(lbounds),float(rblocks)*halfArea(rbounds));
return Split(sah,axis0,axis1);
}
/*! array partitioning */
void split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
if (!split.valid()) {
deterministic_order(set);
return splitFallback(set,lset,rset);
}
const size_t begin = set.begin();
const size_t end = set.end();
CentGeomBBox3fa local_left(empty);
CentGeomBBox3fa local_right(empty);
auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
const Vec3fa axisi = normalize(direction(prim));
const float cos0 = abs(dot(axisi,split.axis0));
const float cos1 = abs(dot(axisi,split.axis1));
return cos0 > cos1;
};
auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
pinfo.extend(bounds(ref));
};
size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
new (&lset) PrimInfoRange(begin,center,local_left);
new (&rset) PrimInfoRange(center,end,local_right);
assert(area(lset.geomBounds) >= 0.0f);
assert(area(rset.geomBounds) >= 0.0f);
}
void deterministic_order(const Set& set)
{
/* required as parallel partition destroys original primitive order */
std::sort(&prims[set.begin()],&prims[set.end()]);
}
void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
{
const size_t begin = set.begin();
const size_t end = set.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend(bounds(prims[i]));
new (&lset) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend(bounds(prims[i]));
new (&rset) PrimInfoRange(center,end,right);
}
private:
Scene* const scene;
PrimRef* const prims;
};
}
}

View file

@ -0,0 +1,237 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../builders/primref_mb.h"
#include "../../common/algorithms/parallel_filter.h"
#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f
namespace embree
{
namespace isa
{
/*! Performs standard object binning */
template<typename PrimRefMB, typename RecalculatePrimRef, size_t BINS>
struct HeuristicMBlurTemporalSplit
{
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
typedef mvector<PrimRefMB>* PrimRefVector;
typedef typename PrimRefMB::BBox BBox;
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
HeuristicMBlurTemporalSplit (MemoryMonitorInterface* device, const RecalculatePrimRef& recalculatePrimRef)
: device(device), recalculatePrimRef(recalculatePrimRef) {}
struct TemporalBinInfo
{
__forceinline TemporalBinInfo () {
}
__forceinline TemporalBinInfo (EmptyTy)
{
for (size_t i=0; i<BINS-1; i++)
{
count0[i] = count1[i] = 0;
bounds0[i] = bounds1[i] = empty;
}
}
void bin(const PrimRefMB* prims, size_t begin, size_t end, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
{
for (int b=0; b<BINS-1; b++)
{
const float t = float(b+1)/float(BINS);
const float ct = lerp(time_range.lower,time_range.upper,t);
const float center_time = set.align_time(ct);
if (center_time <= time_range.lower) continue;
if (center_time >= time_range.upper) continue;
const BBox1f dt0(time_range.lower,center_time);
const BBox1f dt1(center_time,time_range.upper);
/* find linear bounds for both time segments */
for (size_t i=begin; i<end; i++)
{
if (prims[i].time_range_overlap(dt0))
{
const LBBox3fa bn0 = recalculatePrimRef.linearBounds(prims[i],dt0);
#if MBLUR_BIN_LBBOX
bounds0[b].extend(bn0);
#else
bounds0[b].extend(bn0.interpolate(0.5f));
#endif
count0[b] += prims[i].timeSegmentRange(dt0).size();
}
if (prims[i].time_range_overlap(dt1))
{
const LBBox3fa bn1 = recalculatePrimRef.linearBounds(prims[i],dt1);
#if MBLUR_BIN_LBBOX
bounds1[b].extend(bn1);
#else
bounds1[b].extend(bn1.interpolate(0.5f));
#endif
count1[b] += prims[i].timeSegmentRange(dt1).size();
}
}
}
}
__forceinline void bin_parallel(const PrimRefMB* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, BBox1f time_range, const SetMB& set, const RecalculatePrimRef& recalculatePrimRef)
{
if (likely(end-begin < parallelThreshold)) {
bin(prims,begin,end,time_range,set,recalculatePrimRef);
}
else
{
auto bin = [&](const range<size_t>& r) -> TemporalBinInfo {
TemporalBinInfo binner(empty); binner.bin(prims, r.begin(), r.end(), time_range, set, recalculatePrimRef); return binner;
};
*this = parallel_reduce(begin,end,blockSize,TemporalBinInfo(empty),bin,merge2);
}
}
/*! merges in other binning information */
__forceinline void merge (const TemporalBinInfo& other)
{
for (size_t i=0; i<BINS-1; i++)
{
count0[i] += other.count0[i];
count1[i] += other.count1[i];
bounds0[i].extend(other.bounds0[i]);
bounds1[i].extend(other.bounds1[i]);
}
}
static __forceinline const TemporalBinInfo merge2(const TemporalBinInfo& a, const TemporalBinInfo& b) {
TemporalBinInfo r = a; r.merge(b); return r;
}
Split best(int logBlockSize, BBox1f time_range, const SetMB& set)
{
float bestSAH = inf;
float bestPos = 0.0f;
for (int b=0; b<BINS-1; b++)
{
float t = float(b+1)/float(BINS);
float ct = lerp(time_range.lower,time_range.upper,t);
const float center_time = set.align_time(ct);
if (center_time <= time_range.lower) continue;
if (center_time >= time_range.upper) continue;
const BBox1f dt0(time_range.lower,center_time);
const BBox1f dt1(center_time,time_range.upper);
/* calculate sah */
const size_t lCount = (count0[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
const size_t rCount = (count1[b]+(size_t(1) << logBlockSize)-1) >> int(logBlockSize);
float sah0 = expectedApproxHalfArea(bounds0[b])*float(lCount)*dt0.size();
float sah1 = expectedApproxHalfArea(bounds1[b])*float(rCount)*dt1.size();
if (unlikely(lCount == 0)) sah0 = 0.0f; // happens for initial splits when objects not alive over entire shutter time
if (unlikely(rCount == 0)) sah1 = 0.0f;
const float sah = sah0+sah1;
if (sah < bestSAH) {
bestSAH = sah;
bestPos = center_time;
}
}
return Split(bestSAH*MBLUR_TIME_SPLIT_THRESHOLD,(unsigned)Split::SPLIT_TEMPORAL,0,bestPos);
}
public:
size_t count0[BINS-1];
size_t count1[BINS-1];
BBox bounds0[BINS-1];
BBox bounds1[BINS-1];
};
/*! finds the best split */
const Split find(const SetMB& set, const size_t logBlockSize)
{
assert(set.size() > 0);
TemporalBinInfo binner(empty);
binner.bin_parallel(set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,set.time_range,set,recalculatePrimRef);
Split tsplit = binner.best((int)logBlockSize,set.time_range,set);
if (!tsplit.valid()) tsplit.data = Split::SPLIT_FALLBACK; // use fallback split
return tsplit;
}
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& tsplit, const SetMB& set, SetMB& lset, SetMB& rset)
{
assert(tsplit.sah != float(inf));
assert(tsplit.fpos > set.time_range.lower);
assert(tsplit.fpos < set.time_range.upper);
float center_time = tsplit.fpos;
const BBox1f time_range0(set.time_range.lower,center_time);
const BBox1f time_range1(center_time,set.time_range.upper);
mvector<PrimRefMB>& prims = *set.prims;
/* calculate primrefs for first time range */
std::unique_ptr<mvector<PrimRefMB>> new_vector(new mvector<PrimRefMB>(device, set.size()));
PrimRefVector lprims = new_vector.get();
auto reduction_func0 = [&] (const range<size_t>& r) {
PrimInfoMB pinfo = empty;
for (size_t i=r.begin(); i<r.end(); i++)
{
if (likely(prims[i].time_range_overlap(time_range0)))
{
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range0);
(*lprims)[i-set.begin()] = prim;
pinfo.add_primref(prim);
}
else
{
(*lprims)[i-set.begin()] = prims[i];
}
}
return pinfo;
};
PrimInfoMB linfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func0,PrimInfoMB::merge2);
/* primrefs for first time range are in lprims[0 .. set.size()) */
/* some primitives may need to be filtered out */
if (linfo.size() != set.size())
linfo.object_range._end = parallel_filter(lprims->data(), size_t(0), set.size(), size_t(1024),
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range0); });
lset = SetMB(linfo,lprims,time_range0);
/* calculate primrefs for second time range */
auto reduction_func1 = [&] (const range<size_t>& r) {
PrimInfoMB pinfo = empty;
for (size_t i=r.begin(); i<r.end(); i++)
{
if (likely(prims[i].time_range_overlap(time_range1)))
{
const PrimRefMB& prim = recalculatePrimRef(prims[i],time_range1);
prims[i] = prim;
pinfo.add_primref(prim);
}
}
return pinfo;
};
PrimInfoMB rinfo = parallel_reduce(set.object_range,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD,PrimInfoMB(empty),reduction_func1,PrimInfoMB::merge2);
rinfo.object_range = range<size_t>(set.begin(), set.begin() + rinfo.size());
/* primrefs for second time range are in prims[set.begin() .. set.end()) */
/* some primitives may need to be filtered out */
if (rinfo.size() != set.size())
rinfo.object_range._end = parallel_filter(prims.data(), set.begin(), set.end(), size_t(1024),
[&](const PrimRefMB& prim) { return prim.time_range_overlap(time_range1); });
rset = SetMB(rinfo,&prims,time_range1);
return new_vector;
}
private:
MemoryMonitorInterface* device; // device to report memory usage to
const RecalculatePrimRef recalculatePrimRef;
};
}
}

View file

@ -0,0 +1,167 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primref.h"
namespace embree
{
// FIXME: maybe there's a better place for this util fct
__forceinline float areaProjectedTriangle(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2)
{
const Vec3fa e0 = v1-v0;
const Vec3fa e1 = v2-v0;
const Vec3fa d = cross(e0,e1);
return fabs(d.x) + fabs(d.y) + fabs(d.z);
}
//namespace isa
//{
template<typename BBox>
class CentGeom
{
public:
__forceinline CentGeom () {}
__forceinline CentGeom (EmptyTy)
: geomBounds(empty), centBounds(empty) {}
__forceinline CentGeom (const BBox& geomBounds, const BBox3fa& centBounds)
: geomBounds(geomBounds), centBounds(centBounds) {}
template<typename PrimRef>
__forceinline void extend_primref(const PrimRef& prim)
{
BBox bounds; Vec3fa center;
prim.binBoundsAndCenter(bounds,center);
geomBounds.extend(bounds);
centBounds.extend(center);
}
static void extend_ref (CentGeom& pinfo, const PrimRef& ref) {
pinfo.extend_primref(ref);
};
template<typename PrimRef>
__forceinline void extend_center2(const PrimRef& prim)
{
BBox3fa bounds = prim.bounds();
geomBounds.extend(bounds);
centBounds.extend(bounds.center2());
}
__forceinline void extend(const BBox& geomBounds_) {
geomBounds.extend(geomBounds_);
centBounds.extend(center2(geomBounds_));
}
__forceinline void merge(const CentGeom& other)
{
geomBounds.extend(other.geomBounds);
centBounds.extend(other.centBounds);
}
static __forceinline const CentGeom merge2(const CentGeom& a, const CentGeom& b) {
CentGeom r = a; r.merge(b); return r;
}
public:
BBox geomBounds; //!< geometry bounds of primitives
BBox3fa centBounds; //!< centroid bounds of primitives
};
typedef CentGeom<BBox3fa> CentGeomBBox3fa;
/*! stores bounding information for a set of primitives */
template<typename BBox>
class PrimInfoT : public CentGeom<BBox>
{
public:
using CentGeom<BBox>::geomBounds;
using CentGeom<BBox>::centBounds;
__forceinline PrimInfoT () {}
__forceinline PrimInfoT (EmptyTy)
: CentGeom<BBox>(empty), begin(0), end(0) {}
__forceinline PrimInfoT (size_t N)
: CentGeom<BBox>(empty), begin(0), end(N) {}
__forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
: CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeom<BBox>::extend_primref(prim);
end++;
}
template<typename PrimRef>
__forceinline void add_center2(const PrimRef& prim) {
CentGeom<BBox>::extend_center2(prim);
end++;
}
template<typename PrimRef>
__forceinline void add_center2(const PrimRef& prim, const size_t i) {
CentGeom<BBox>::extend_center2(prim);
end+=i;
}
/*__forceinline void add(const BBox& geomBounds_) {
CentGeom<BBox>::extend(geomBounds_);
end++;
}
__forceinline void add(const BBox& geomBounds_, const size_t i) {
CentGeom<BBox>::extend(geomBounds_);
end+=i;
}*/
__forceinline void merge(const PrimInfoT& other)
{
CentGeom<BBox>::merge(other);
begin += other.begin;
end += other.end;
}
static __forceinline const PrimInfoT merge(const PrimInfoT& a, const PrimInfoT& b) {
PrimInfoT r = a; r.merge(b); return r;
}
/*! returns the number of primitives */
__forceinline size_t size() const {
return end-begin;
}
__forceinline float halfArea() {
return expectedApproxHalfArea(geomBounds);
}
__forceinline float leafSAH() const {
return expectedApproxHalfArea(geomBounds)*float(size());
//return halfArea(geomBounds)*blocks(num);
}
__forceinline float leafSAH(size_t block_shift) const {
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
//return halfArea(geomBounds)*float((num+3) >> 2);
//return halfArea(geomBounds)*blocks(num);
}
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoT& pinfo) {
return cout << "PrimInfo { begin = " << pinfo.begin << ", end = " << pinfo.end << ", geomBounds = " << pinfo.geomBounds << ", centBounds = " << pinfo.centBounds << "}";
}
public:
size_t begin,end; //!< number of primitives
};
typedef PrimInfoT<BBox3fa> PrimInfo;
//typedef PrimInfoT<LBBox3fa> PrimInfoMB;
//}
}

View file

@ -0,0 +1,210 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primref_mb.h"
namespace embree
{
/*! stores bounding information for a set of primitives */
template<typename BBox>
class PrimInfoMBT : public CentGeom<BBox>
{
public:
using CentGeom<BBox>::geomBounds;
using CentGeom<BBox>::centBounds;
__forceinline PrimInfoMBT () {
}
__forceinline PrimInfoMBT (EmptyTy)
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
__forceinline PrimInfoMBT (size_t begin, size_t end)
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeom<BBox>::extend_primref(prim);
time_range.extend(prim.time_range);
object_range._end++;
num_time_segments += prim.size();
if (max_num_time_segments < prim.totalTimeSegments()) {
max_num_time_segments = prim.totalTimeSegments();
max_time_range = prim.time_range;
}
}
__forceinline void merge(const PrimInfoMBT& other)
{
CentGeom<BBox>::merge(other);
time_range.extend(other.time_range);
object_range._begin += other.object_range.begin();
object_range._end += other.object_range.end();
num_time_segments += other.num_time_segments;
if (max_num_time_segments < other.max_num_time_segments) {
max_num_time_segments = other.max_num_time_segments;
max_time_range = other.max_time_range;
}
}
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
PrimInfoMBT r = a; r.merge(b); return r;
}
__forceinline size_t begin() const {
return object_range.begin();
}
__forceinline size_t end() const {
return object_range.end();
}
/*! returns the number of primitives */
__forceinline size_t size() const {
return object_range.size();
}
__forceinline float halfArea() const {
return time_range.size()*expectedApproxHalfArea(geomBounds);
}
__forceinline float leafSAH() const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
}
__forceinline float leafSAH(size_t block_shift) const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
}
__forceinline float align_time(float ct) const
{
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
float t0 = (ct-max_time_range.lower)/max_time_range.size();
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
return t1*max_time_range.size()+max_time_range.lower;
}
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
{
return cout << "PrimInfo { " <<
"object_range = " << pinfo.object_range <<
", time_range = " << pinfo.time_range <<
", time_segments = " << pinfo.num_time_segments <<
", geomBounds = " << pinfo.geomBounds <<
", centBounds = " << pinfo.centBounds <<
"}";
}
public:
range<size_t> object_range; //!< primitive range
size_t num_time_segments; //!< total number of time segments of all added primrefs
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
};
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
struct SetMB : public PrimInfoMB
{
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
typedef mvector<PrimRefMB>* PrimRefVector;
__forceinline SetMB() {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
: PrimInfoMB(pinfo_i), prims(prims) {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
object_range = object_range_in;
time_range = intersect(time_range,time_range_in);
}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
time_range = intersect(time_range,time_range_in);
}
void deterministic_order() const
{
/* required as parallel partition destroys original primitive order */
PrimRefMB* prim = prims->data();
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
pinfo.add_primref(ref1);
};
return pinfo;
};
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
return SetMB(pinfo,prims,object_range,time_range);
}
public:
PrimRefVector prims;
};
//}
}

View file

@ -0,0 +1,139 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/default.h"
namespace embree
{
/*! A primitive reference stores the bounds of the primitive and its ID. */
struct __aligned(32) PrimRef
{
__forceinline PrimRef () {}
#if defined(__AVX__)
__forceinline PrimRef(const PrimRef& v) {
vfloat8::store((float*)this,vfloat8::load((float*)&v));
}
__forceinline PrimRef& operator=(const PrimRef& v) {
vfloat8::store((float*)this,vfloat8::load((float*)&v)); return *this;
}
#endif
__forceinline PrimRef (const BBox3fa& bounds, unsigned int geomID, unsigned int primID)
{
lower = Vec3fx(bounds.lower, geomID);
upper = Vec3fx(bounds.upper, primID);
}
__forceinline PrimRef (const BBox3fa& bounds, size_t id)
{
#if defined(__64BIT__)
lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF));
upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF));
#else
lower = Vec3fx(bounds.lower, (unsigned)id);
upper = Vec3fx(bounds.upper, (unsigned)0);
#endif
}
/*! calculates twice the center of the primitive */
__forceinline const Vec3fa center2() const {
return lower+upper;
}
/*! return the bounding box of the primitive */
__forceinline const BBox3fa bounds() const {
return BBox3fa(lower,upper);
}
/*! size for bin heuristic is 1 */
__forceinline unsigned size() const {
return 1;
}
/*! returns bounds and centroid used for binning */
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
{
bounds_o = bounds();
center_o = embree::center2(bounds_o);
}
__forceinline unsigned& geomIDref() { // FIXME: remove !!!!!!!
return lower.u;
}
__forceinline unsigned& primIDref() { // FIXME: remove !!!!!!!
return upper.u;
}
/*! returns the geometry ID */
__forceinline unsigned geomID() const {
return lower.a;
}
/*! returns the primitive ID */
__forceinline unsigned primID() const {
return upper.a;
}
/*! returns an size_t sized ID */
__forceinline size_t ID() const {
#if defined(__64BIT__)
return size_t(lower.u) + (size_t(upper.u) << 32);
#else
return size_t(lower.u);
#endif
}
/*! special function for operator< */
__forceinline uint64_t ID64() const {
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
}
/*! allows sorting the primrefs by ID */
friend __forceinline bool operator<(const PrimRef& p0, const PrimRef& p1) {
return p0.ID64() < p1.ID64();
}
/*! Outputs primitive reference to a stream. */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRef& ref) {
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << " }";
}
public:
Vec3fx lower; //!< lower bounds and geomID
Vec3fx upper; //!< upper bounds and primID
};
/*! fast exchange for PrimRefs */
__forceinline void xchg(PrimRef& a, PrimRef& b)
{
#if defined(__AVX__)
const vfloat8 aa = vfloat8::load((float*)&a);
const vfloat8 bb = vfloat8::load((float*)&b);
vfloat8::store((float*)&a,bb);
vfloat8::store((float*)&b,aa);
#else
std::swap(a,b);
#endif
}
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
struct SubGridBuildData {
unsigned short sx,sy;
unsigned int primID;
__forceinline SubGridBuildData() {};
__forceinline SubGridBuildData(const unsigned int sx, const unsigned int sy, const unsigned int primID) : sx(sx), sy(sy), primID(primID) {};
__forceinline size_t x() const { return (size_t)sx & 0x7fff; }
__forceinline size_t y() const { return (size_t)sy & 0x7fff; }
};
}

View file

@ -0,0 +1,262 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/default.h"
#define MBLUR_BIN_LBBOX 1
namespace embree
{
#if MBLUR_BIN_LBBOX
/*! A primitive reference stores the bounds of the primitive and its ID. */
struct PrimRefMB
{
typedef LBBox3fa BBox;
__forceinline PrimRefMB () {}
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
{
assert(activeTimeSegments > 0);
lbounds.bounds0.lower.a = geomID;
lbounds.bounds0.upper.a = primID;
lbounds.bounds1.lower.a = activeTimeSegments;
lbounds.bounds1.upper.a = totalTimeSegments;
}
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
{
assert(activeTimeSegments > 0);
#if defined(__64BIT__)
lbounds.bounds0.lower.a = id & 0xFFFFFFFF;
lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF;
#else
lbounds.bounds0.lower.a = id;
lbounds.bounds0.upper.a = 0;
#endif
lbounds.bounds1.lower.a = activeTimeSegments;
lbounds.bounds1.upper.a = totalTimeSegments;
}
__forceinline PrimRefMB (const LBBox3fa& lbounds_i, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
: lbounds((LBBox3fx)lbounds_i), time_range(time_range)
{
assert(activeTimeSegments > 0);
#if defined(__64BIT__)
lbounds.bounds0.lower.u = id & 0xFFFFFFFF;
lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF;
#else
lbounds.bounds0.lower.u = id;
lbounds.bounds0.upper.u = 0;
#endif
lbounds.bounds1.lower.a = activeTimeSegments;
lbounds.bounds1.upper.a = totalTimeSegments;
}
/*! returns bounds for binning */
__forceinline LBBox3fa bounds() const {
return lbounds;
}
/*! returns the number of time segments of this primref */
__forceinline unsigned size() const {
return lbounds.bounds1.lower.a;
}
__forceinline unsigned totalTimeSegments() const {
return lbounds.bounds1.upper.a;
}
/* calculate overlapping time segment range */
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
return getTimeSegmentRange(range,time_range,float(totalTimeSegments()));
}
/* returns time that corresponds to time step */
__forceinline float timeStep(const int i) const {
assert(i>=0 && i<=(int)totalTimeSegments());
return time_range.lower + time_range.size()*float(i)/float(totalTimeSegments());
}
/*! checks if time range overlaps */
__forceinline bool time_range_overlap(const BBox1f& range) const
{
if (0.9999f*time_range.upper <= range.lower) return false;
if (1.0001f*time_range.lower >= range.upper) return false;
return true;
}
/*! returns center for binning */
__forceinline Vec3fa binCenter() const {
return center2(lbounds.interpolate(0.5f));
}
/*! returns bounds and centroid used for binning */
__forceinline void binBoundsAndCenter(LBBox3fa& bounds_o, Vec3fa& center_o) const
{
bounds_o = bounds();
center_o = binCenter();
}
/*! returns the geometry ID */
__forceinline unsigned geomID() const {
return lbounds.bounds0.lower.a;
}
/*! returns the primitive ID */
__forceinline unsigned primID() const {
return lbounds.bounds0.upper.a;
}
/*! returns an size_t sized ID */
__forceinline size_t ID() const {
#if defined(__64BIT__)
return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32);
#else
return size_t(lbounds.bounds0.lower.u);
#endif
}
/*! special function for operator< */
__forceinline uint64_t ID64() const {
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
}
/*! allows sorting the primrefs by ID */
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
return p0.ID64() < p1.ID64();
}
/*! Outputs primitive reference to a stream. */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
return cout << "{ time_range = " << ref.time_range << ", bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
}
public:
LBBox3fx lbounds;
BBox1f time_range; // entire geometry time range
};
#else
/*! A primitive reference stores the bounds of the primitive and its ID. */
struct __aligned(16) PrimRefMB
{
typedef BBox3fa BBox;
__forceinline PrimRefMB () {}
__forceinline PrimRefMB (const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, unsigned int geomID, unsigned int primID)
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
{
assert(activeTimeSegments > 0);
bbox.lower.a = geomID;
bbox.upper.a = primID;
}
__forceinline PrimRefMB (EmptyTy empty, const LBBox3fa& bounds, unsigned int activeTimeSegments, BBox1f time_range, unsigned int totalTimeSegments, size_t id)
: bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range)
{
assert(activeTimeSegments > 0);
#if defined(__64BIT__)
bbox.lower.u = id & 0xFFFFFFFF;
bbox.upper.u = (id >> 32) & 0xFFFFFFFF;
#else
bbox.lower.u = id;
bbox.upper.u = 0;
#endif
}
/*! returns bounds for binning */
__forceinline BBox3fa bounds() const {
return bbox;
}
/*! returns the number of time segments of this primref */
__forceinline unsigned int size() const {
return _activeTimeSegments;
}
__forceinline unsigned int totalTimeSegments() const {
return _totalTimeSegments;
}
/* calculate overlapping time segment range */
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
return getTimeSegmentRange(range,time_range,float(_totalTimeSegments));
}
/* returns time that corresponds to time step */
__forceinline float timeStep(const int i) const {
assert(i>=0 && i<=(int)_totalTimeSegments);
return time_range.lower + time_range.size()*float(i)/float(_totalTimeSegments);
}
/*! checks if time range overlaps */
__forceinline bool time_range_overlap(const BBox1f& range) const
{
if (0.9999f*time_range.upper <= range.lower) return false;
if (1.0001f*time_range.lower >= range.upper) return false;
return true;
}
/*! returns center for binning */
__forceinline Vec3fa binCenter() const {
return center2(bounds());
}
/*! returns bounds and centroid used for binning */
__forceinline void binBoundsAndCenter(BBox3fa& bounds_o, Vec3fa& center_o) const
{
bounds_o = bounds();
center_o = center2(bounds());
}
/*! returns the geometry ID */
__forceinline unsigned int geomID() const {
return bbox.lower.a;
}
/*! returns the primitive ID */
__forceinline unsigned int primID() const {
return bbox.upper.a;
}
/*! returns an size_t sized ID */
__forceinline size_t ID() const {
#if defined(__64BIT__)
return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32);
#else
return size_t(bbox.lower.u);
#endif
}
/*! special function for operator< */
__forceinline uint64_t ID64() const {
return (((uint64_t)primID()) << 32) + (uint64_t)geomID();
}
/*! allows sorting the primrefs by ID */
friend __forceinline bool operator<(const PrimRefMB& p0, const PrimRefMB& p1) {
return p0.ID64() < p1.ID64();
}
/*! Outputs primitive reference to a stream. */
friend __forceinline embree_ostream operator<<(embree_ostream cout, const PrimRefMB& ref) {
return cout << "{ bounds = " << ref.bounds() << ", geomID = " << ref.geomID() << ", primID = " << ref.primID() << ", active_segments = " << ref.size() << ", total_segments = " << ref.totalTimeSegments() << " }";
}
public:
BBox3fa bbox; // bounds, geomID, primID
unsigned int _activeTimeSegments;
unsigned int _totalTimeSegments;
BBox1f time_range; // entire geometry time range
};
#endif
}

View file

@ -0,0 +1,359 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "primrefgen.h"
#include "primrefgen_presplit.h"
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
namespace embree
{
namespace isa
{
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
{
ParallelPrefixSumState<PrimInfo> pstate;
/* first try */
progressMonitor(0);
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,mblur);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,mblur);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,k,(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,true);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
return mesh->createPrimRefArrayMB(prims,itime,r,k,(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArrayMB(prims,itime,r,base.size(),(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
{
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator2 iter(scene,types,true);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,t0t1,r,k,(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,t0t1,r,base.size(),(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
}
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
pinfo.time_range = t0t1;
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
{
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator2 iter(scene,types,true);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,k,(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
}
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
pinfo.time_range = t0t1;
return pinfo;
}
template<typename Mesh>
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
{
size_t numPrimitives = morton.size();
/* compute scene bounds */
std::pair<size_t,BBox3fa> cb_empty(0,empty);
auto cb = parallel_reduce
( size_t(0), numPrimitives, size_t(1024), cb_empty, [&](const range<size_t>& r) -> std::pair<size_t,BBox3fa>
{
size_t num = 0;
BBox3fa bounds = empty;
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa prim_bounds = empty;
if (unlikely(!mesh->buildBounds(j,&prim_bounds))) continue;
bounds.extend(center2(prim_bounds));
num++;
}
return std::make_pair(num,bounds);
}, [] (const std::pair<size_t,BBox3fa>& a, const std::pair<size_t,BBox3fa>& b) {
return std::make_pair(a.first + b.first,merge(a.second,b.second));
});
size_t numPrimitivesGen = cb.first;
const BBox3fa centBounds = cb.second;
/* compute morton codes */
if (likely(numPrimitivesGen == numPrimitives))
{
/* fast path if all primitives were valid */
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
parallel_for( size_t(0), numPrimitives, size_t(1024), [&](const range<size_t>& r) -> void {
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
for (size_t j=r.begin(); j<r.end(); j++)
generator(mesh->bounds(j),unsigned(j));
});
}
else
{
/* slow path, fallback in case some primitives were invalid */
ParallelPrefixSumState<size_t> pstate;
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
size_t num = 0;
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[r.begin()]);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (unlikely(!mesh->buildBounds(j,&bounds))) continue;
generator(bounds,unsigned(j));
num++;
}
return num;
}, std::plus<size_t>());
parallel_prefix_sum( pstate, size_t(0), numPrimitives, size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t base) -> size_t {
size_t num = 0;
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton.data()[base]);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!mesh->buildBounds(j,&bounds)) continue;
generator(bounds,unsigned(j));
num++;
}
return num;
}, std::plus<size_t>());
}
return numPrimitivesGen;
}
// ====================================================================================================
// ====================================================================================================
// ====================================================================================================
// special variants for grid meshes
#if defined(EMBREE_GEOMETRY_GRID)
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
{
PrimInfo pinfo(empty);
size_t numPrimitives = 0;
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator<GridMesh,false> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j)) continue;
BBox3fa bounds = empty;
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j);
if (!mesh->valid(j)) continue;
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
numPrimitives = pinfo.size();
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo.size() == numPrimitives);
return pinfo;
}
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
{
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
PrimInfo pinfo(empty);
size_t numPrimitives = 0;
ParallelPrefixSumState<PrimInfo> pstate;
/* iterate over all grids in a single mesh */
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j)) continue;
BBox3fa bounds = empty;
const PrimRef prim(bounds,geomID_,unsigned(j));
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
numPrimitives = pinfo.size();
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID_);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
LBBox3fa bounds(empty);
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
pinfoMB.merge(gridMB);
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
size_t numPrimitives = pinfoMB.size();
if (numPrimitives == 0) return pinfoMB;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
assert(pinfoMB.size() == numPrimitives);
pinfoMB.time_range = t0t1;
return pinfoMB;
}
#endif
// ====================================================================================================
// ====================================================================================================
// ====================================================================================================
IF_ENABLED_TRIS (template size_t createMortonCodeArray<TriangleMesh>(TriangleMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_INSTANCE_ARRAY (template size_t createMortonCodeArray<InstanceArray>(InstanceArray* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
}
}

View file

@ -0,0 +1,37 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/scene.h"
#include "priminfo.h"
#include "priminfo_mb.h"
#include "bvh_builder_morton.h"
namespace embree
{
namespace isa
{
PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor);
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
template<typename Mesh>
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
/* special variants for grids */
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); // FIXME: remove
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
}
}

View file

@ -0,0 +1,468 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_sort.h"
#include "../builders/heuristic_spatial.h"
#include "../builders/splitter.h"
#include "../../common/algorithms/parallel_partition.h"
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
#define DBG_PRESPLIT(x)
#define CHECK_PRESPLIT(x)
#define GRID_SIZE 1024
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
namespace embree
{
namespace isa
{
struct SplittingGrid
{
__forceinline SplittingGrid(const BBox3fa& bounds)
{
base = bounds.lower;
const Vec3fa diag = bounds.size();
extend = max(diag.x,max(diag.y,diag.z));
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
}
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
{
/* compute morton code */
const Vec3fa lower = prim.lower;
const Vec3fa upper = prim.upper;
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
Vec3ia ilower(floor(glower));
Vec3ia iupper(floor(gupper));
/* this ignores dimensions that are empty */
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
/* compute a morton code for the lower and upper grid coordinates. */
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
/* if all bits are equal then we cannot split */
if (unlikely(lower_code == upper_code))
return false;
/* compute octree level and dimension to perform the split in */
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
const unsigned int level = diff / 3;
const unsigned int dim = diff % 3;
/* now we compute the grid position of the split */
const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
/* compute world space position of split */
const float inv_grid_size = 1.0f / GRID_SIZE;
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
dim_o = dim;
fsplit_o = fsplit;
return true;
}
__forceinline Vec2i computeMC(const PrimRef& ref) const
{
const Vec3fa lower = ref.lower;
const Vec3fa upper = ref.upper;
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
Vec3ia ilower(floor(glower));
Vec3ia iupper(floor(gupper));
/* this ignores dimensions that are empty */
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
/* compute a morton code for the lower and upper grid coordinates. */
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
return Vec2i(lower_code,upper_code);
}
Vec3fa base;
float scale;
float extend;
};
struct PresplitItem
{
union {
float priority;
unsigned int data;
};
unsigned int index;
__forceinline operator unsigned() const {
return data;
}
template<typename ProjectedPrimitiveAreaFunc>
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
{
const float area_aabb = area(ref.bounds());
const float area_prim = primitiveArea(ref);
if (area_prim == 0.0f) return 0.0f;
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
const float area_diff = max(0.0f, area_aabb - area_prim);
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
//const float priority = sqrtf(sqrtf( area_diff ) );
//const float priority = sqrtfarea_diff;
//const float priority = area_diff; // 104 fps !!!!!!!!!!
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
//const float priority = area_prim;
assert(priority >= 0.0f && priority < FLT_LARGE);
return priority;
}
};
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
return cout << "index " << item.index << " priority " << item.priority;
};
#if 1
template<typename Splitter>
void splitPrimitive(const Splitter& splitter,
const PrimRef& prim,
const unsigned int splitprims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
if (splitprims == 1)
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
}
else
{
unsigned int dim; float fsplit;
if (!grid.split_pos(prim, dim, fsplit))
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
return;
}
/* split primitive */
PrimRef left,right;
splitter(prim,dim,fsplit,left,right);
assert(!left.bounds().empty());
assert(!right.bounds().empty());
const unsigned int splitprims_left = splitprims/2;
const unsigned int splitprims_right = splitprims - splitprims_left;
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
}
}
#else
template<typename Splitter>
void splitPrimitive(const Splitter& splitter,
const PrimRef& prim,
const unsigned int targetSubPrims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
return area(a.bounds()) < area(b.bounds());
};
subPrims[numSubPrims++] = prim;
while (numSubPrims < targetSubPrims)
{
/* get top heap element */
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
PrimRef top = subPrims[--numSubPrims];
unsigned int dim; float fsplit;
if (!grid.split_pos(top, dim, fsplit))
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = top;
return;
}
/* split primitive */
PrimRef left,right;
splitter(top,dim,fsplit,left,right);
assert(!left.bounds().empty());
assert(!right.bounds().empty());
subPrims[numSubPrims++] = left;
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
subPrims[numSubPrims++] = right;
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
}
}
#endif
#if !defined(RTHWIF_STANDALONE)
template<typename Mesh, typename SplitterFactory>
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
{
ParallelPrefixSumState<PrimInfo> pstate;
/* first try */
progressMonitor(0);
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
#endif
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
PrimVector& prims,
const PrimInfo& pinfo,
const SplitPrimitiveFunc& splitPrimitive,
const ProjectedPrimitiveAreaFunc& primitiveArea)
{
static const size_t MIN_STEP_SIZE = 128;
/* use correct number of primitives */
size_t numPrimitives = pinfo.size();
const size_t numPrimitivesExt = prims.size();
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
/* allocate double buffer presplit items */
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
/* compute grid */
SplittingGrid grid(pinfo.geomBounds);
/* init presplit items and get total sum */
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
float sum = 0.0f;
for (size_t i=r.begin(); i<r.end(); i++)
{
preSplitItem0[i].index = (unsigned int)i;
const Vec2i mc = grid.computeMC(prims[i]);
/* if all bits are equal then we cannot split */
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
/* FIXME: sum undeterministic */
sum += preSplitItem0[i].priority;
}
return sum;
},[](const float& a, const float& b) -> float { return a+b; });
/* compute number of splits per primitive */
const float inv_psum = 1.0f / psum;
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
{
if (preSplitItem0[i].priority <= 0.0f) {
preSplitItem0[i].data = 1;
continue;
}
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
if (rel_p < 1) {
preSplitItem0[i].data = 1;
continue;
}
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
}
});
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
assert(center <= numPrimitives);
/* anything to split ? */
if (center >= numPrimitives)
return pinfo;
size_t numPrimitivesToSplit = numPrimitives - center;
assert(preSplitItem0[center].data >= 1.0f);
/* sort presplit items in ascending order */
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
CHECK_PRESPLIT(
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
});
);
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
size_t sum = 0;
for (size_t i=t.begin(); i<t.end(); i++)
{
const unsigned int primrefID = preSplitItem0[i].index;
const unsigned int splitprims = preSplitItem0[i].data;
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
unsigned int numSubPrims = 0;
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
assert(numSubPrims);
numSubPrims--; // can reuse slot
sum+=numSubPrims;
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
primOffset0[i-center] = numSubPrims;
}
return sum;
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
/* if we are over budget, need to shrink the range */
if (totalNumSubPrims > numSplitPrimitivesBudget)
{
size_t new_center = numPrimitives-1;
size_t sum = 0;
for (;new_center>=center;new_center--)
{
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
sum += numSubPrims;
}
new_center++;
primOffset0 += new_center - center;
numPrimitivesToSplit -= new_center - center;
center = new_center;
assert(numPrimitivesToSplit == (numPrimitives - center));
}
/* parallel prefix sum to compute offsets for storing sub-primitives */
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
assert(numPrimitives+offset <= numPrimitivesExt);
/* iterate over range, and split primitives into sub primitives and append them to prims array */
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
for (size_t j=rn.begin(); j<rn.end(); j++)
{
const unsigned int primrefID = preSplitItem0[j].index;
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
unsigned int numSubPrims = 0;
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
assert(numSubPrims-1 == numSubPrimsExpected);
const size_t newID = numPrimitives + primOffset1[j-center];
assert(newID+numSubPrims-1 <= numPrimitivesExt);
prims[primrefID] = subPrims[0];
for (size_t i=1;i<numSubPrims;i++)
prims[newID+i-1] = subPrims[i];
}
});
numPrimitives += offset;
/* recompute centroid bounding boxes */
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
PrimInfo p(empty);
for (size_t j=r.begin(); j<r.end(); j++)
p.add_center2(prims[j]);
return p;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo1.size() == numPrimitives);
return pinfo1;
}
#if !defined(RTHWIF_STANDALONE)
template<typename Mesh, typename SplitterFactory>
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,mblur);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
SplitterFactory Splitter(scene);
auto split_primitive = [&] (const PrimRef &prim,
const unsigned int splitprims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
const auto splitter = Splitter(prim);
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
};
auto primitiveArea = [&] (const PrimRef &ref) {
const unsigned int geomID = ref.geomID();
const unsigned int primID = ref.primID();
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
};
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
}
#endif
}
}

View file

@ -0,0 +1,240 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#if !defined(RTHWIF_STANDALONE)
#include "../common/scene.h"
#endif
#include "../builders/primref.h"
namespace embree
{
namespace isa
{
template<size_t N>
__forceinline void splitPolygon(const BBox3fa& bounds,
const size_t dim,
const float pos,
const Vec3fa (&v)[N+1],
BBox3fa& left_o,
BBox3fa& right_o)
{
BBox3fa left = empty, right = empty;
/* clip triangle to left and right box by processing all edges */
for (size_t i=0; i<N; i++)
{
const Vec3fa &v0 = v[i];
const Vec3fa &v1 = v[i+1];
const float v0d = v0[dim];
const float v1d = v1[dim];
if (v0d <= pos) left. extend(v0); // this point is on left side
if (v0d >= pos) right.extend(v0); // this point is on right side
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
{
assert((v1d-v0d) != 0.0f);
const float inv_length = 1.0f/(v1d-v0d);
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
left.extend(c);
right.extend(c);
}
}
/* clip against current bounds */
left_o = intersect(left,bounds);
right_o = intersect(right,bounds);
}
template<size_t N>
__forceinline void splitPolygon(const BBox3fa& bounds,
const size_t dim,
const float pos,
const Vec3fa (&v)[N+1],
const Vec3fa (&inv_length)[N],
BBox3fa& left_o,
BBox3fa& right_o)
{
BBox3fa left = empty, right = empty;
/* clip triangle to left and right box by processing all edges */
for (size_t i=0; i<N; i++)
{
const Vec3fa &v0 = v[i];
const Vec3fa &v1 = v[i+1];
const float v0d = v0[dim];
const float v1d = v1[dim];
if (v0d <= pos) left. extend(v0); // this point is on left side
if (v0d >= pos) right.extend(v0); // this point is on right side
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
{
assert((v1d-v0d) != 0.0f);
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length[i][dim]),v1-v0,v0);
left.extend(c);
right.extend(c);
}
}
/* clip against current bounds */
left_o = intersect(left,bounds);
right_o = intersect(right,bounds);
}
template<size_t N>
__forceinline void splitPolygon(const PrimRef& prim,
const size_t dim,
const float pos,
const Vec3fa (&v)[N+1],
PrimRef& left_o,
PrimRef& right_o)
{
BBox3fa left = empty, right = empty;
for (size_t i=0; i<N; i++)
{
const Vec3fa &v0 = v[i];
const Vec3fa &v1 = v[i+1];
const float v0d = v0[dim];
const float v1d = v1[dim];
if (v0d <= pos) left. extend(v0); // this point is on left side
if (v0d >= pos) right.extend(v0); // this point is on right side
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
{
assert((v1d-v0d) != 0.0f);
const float inv_length = 1.0f/(v1d-v0d);
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
left.extend(c);
right.extend(c);
}
}
/* clip against current bounds */
new (&left_o ) PrimRef(intersect(left ,prim.bounds()),prim.geomID(), prim.primID());
new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
}
#if !defined(RTHWIF_STANDALONE)
struct TriangleSplitter
{
__forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
{
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
const TriangleMesh* mesh = (const TriangleMesh*) scene->get(prim.geomID() & mask );
TriangleMesh::Triangle tri = mesh->triangle(prim.primID());
v[0] = mesh->vertex(tri.v[0]);
v[1] = mesh->vertex(tri.v[1]);
v[2] = mesh->vertex(tri.v[2]);
v[3] = mesh->vertex(tri.v[0]);
inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
inv_length[2] = Vec3fa(1.0f) / (v[0]-v[2]);
}
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
splitPolygon<3>(prim,dim,pos,v,left_o,right_o);
}
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
splitPolygon<3>(prim,dim,pos,v,inv_length,left_o,right_o);
}
private:
Vec3fa v[4];
Vec3fa inv_length[3];
};
struct TriangleSplitterFactory
{
__forceinline TriangleSplitterFactory(const Scene* scene)
: scene(scene) {}
__forceinline TriangleSplitter operator() (const PrimRef& prim) const {
return TriangleSplitter(scene,prim);
}
private:
const Scene* scene;
};
struct QuadSplitter
{
__forceinline QuadSplitter(const Scene* scene, const PrimRef& prim)
{
const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );
QuadMesh::Quad quad = mesh->quad(prim.primID());
v[0] = mesh->vertex(quad.v[1]);
v[1] = mesh->vertex(quad.v[2]);
v[2] = mesh->vertex(quad.v[3]);
v[3] = mesh->vertex(quad.v[0]);
v[4] = mesh->vertex(quad.v[1]);
v[5] = mesh->vertex(quad.v[3]);
inv_length[0] = Vec3fa(1.0f) / (v[1] - v[0]);
inv_length[1] = Vec3fa(1.0f) / (v[2] - v[1]);
inv_length[2] = Vec3fa(1.0f) / (v[3] - v[2]);
inv_length[3] = Vec3fa(1.0f) / (v[4] - v[3]);
inv_length[4] = Vec3fa(1.0f) / (v[5] - v[4]);
}
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
splitPolygon<5>(prim,dim,pos,v,left_o,right_o);
}
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
splitPolygon<5>(prim,dim,pos,v,inv_length,left_o,right_o);
}
private:
Vec3fa v[6];
Vec3fa inv_length[5];
};
struct QuadSplitterFactory
{
__forceinline QuadSplitterFactory(const Scene* scene)
: scene(scene) {}
__forceinline QuadSplitter operator() (const PrimRef& prim) const {
return QuadSplitter(scene,prim);
}
private:
const Scene* scene;
};
struct DummySplitter
{
__forceinline DummySplitter(const Scene* scene, const PrimRef& prim)
{
}
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
}
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
}
};
struct DummySplitterFactory
{
__forceinline DummySplitterFactory(const Scene* scene)
: scene(scene) {}
__forceinline DummySplitter operator() (const PrimRef& prim) const {
return DummySplitter(scene,prim);
}
private:
const Scene* scene;
};
#endif
}
}

View file

@ -0,0 +1,190 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_statistics.h"
namespace embree
{
template<int N>
BVHN<N>::BVHN (const PrimitiveType& primTy, Scene* scene)
: AccelData((N==4) ? AccelData::TY_BVH4 : (N==8) ? AccelData::TY_BVH8 : AccelData::TY_UNKNOWN),
primTy(&primTy), device(scene->device), scene(scene),
root(emptyNode), alloc(scene->device,scene->isStaticAccel()), numPrimitives(0), numVertices(0)
{
}
template<int N>
BVHN<N>::~BVHN ()
{
for (size_t i=0; i<objects.size(); i++)
delete objects[i];
}
template<int N>
void BVHN<N>::clear()
{
set(BVHN::emptyNode,empty,0);
alloc.clear();
}
template<int N>
void BVHN<N>::set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives)
{
this->root = root;
this->bounds = bounds;
this->numPrimitives = numPrimitives;
}
template<int N>
void BVHN<N>::clearBarrier(NodeRef& node)
{
if (node.isBarrier())
node.clearBarrier();
else if (!node.isLeaf()) {
BaseNode* n = node.baseNode(); // FIXME: flags should be stored in BVH
for (size_t c=0; c<N; c++)
clearBarrier(n->child(c));
}
}
template<int N>
void BVHN<N>::layoutLargeNodes(size_t num)
{
#if defined(__64BIT__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
struct NodeArea
{
__forceinline NodeArea() {}
__forceinline NodeArea(NodeRef& node, const BBox3fa& bounds)
: node(&node), A(node.isLeaf() ? float(neg_inf) : area(bounds)) {}
__forceinline bool operator< (const NodeArea& other) const {
return this->A < other.A;
}
NodeRef* node;
float A;
};
std::vector<NodeArea> lst;
lst.reserve(num);
lst.push_back(NodeArea(root,empty));
while (lst.size() < num)
{
std::pop_heap(lst.begin(), lst.end());
NodeArea n = lst.back(); lst.pop_back();
if (!n.node->isAABBNode()) break;
AABBNode* node = n.node->getAABBNode();
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVHN::emptyNode) continue;
lst.push_back(NodeArea(node->child(i),node->bounds(i)));
std::push_heap(lst.begin(), lst.end());
}
}
for (size_t i=0; i<lst.size(); i++)
lst[i].node->setBarrier();
root = layoutLargeNodesRecursion(root,alloc.getCachedAllocator());
#endif
}
template<int N>
typename BVHN<N>::NodeRef BVHN<N>::layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator)
{
if (node.isBarrier()) {
node.clearBarrier();
return node;
}
else if (node.isAABBNode())
{
AABBNode* oldnode = node.getAABBNode();
AABBNode* newnode = (BVHN::AABBNode*) allocator.malloc0(sizeof(BVHN::AABBNode),byteNodeAlignment);
*newnode = *oldnode;
for (size_t c=0; c<N; c++)
newnode->child(c) = layoutLargeNodesRecursion(oldnode->child(c),allocator);
return encodeNode(newnode);
}
else return node;
}
template<int N>
double BVHN<N>::preBuild(const std::string& builderName)
{
if (builderName == "")
return inf;
if (device->verbosity(2))
{
Lock<MutexSys> lock(g_printMutex);
std::cout << "building BVH" << N << (builderName.find("MBlur") != std::string::npos ? "MB" : "") << "<" << primTy->name() << "> using " << builderName << " ..." << std::endl << std::flush;
}
double t0 = 0.0;
if (device->benchmark || device->verbosity(2)) t0 = getSeconds();
return t0;
}
template<int N>
void BVHN<N>::postBuild(double t0)
{
if (t0 == double(inf))
return;
double dt = 0.0;
if (device->benchmark || device->verbosity(2))
dt = getSeconds()-t0;
std::unique_ptr<BVHNStatistics<N>> stat;
/* print statistics */
if (device->verbosity(2))
{
if (!stat) stat.reset(new BVHNStatistics<N>(this));
const size_t usedBytes = alloc.getUsedBytes();
Lock<MutexSys> lock(g_printMutex);
std::cout << "finished BVH" << N << "<" << primTy->name() << "> : " << 1000.0f*dt << "ms, " << 1E-6*double(numPrimitives)/dt << " Mprim/s, " << 1E-9*double(usedBytes)/dt << " GB/s" << std::endl;
if (device->verbosity(2))
std::cout << stat->str();
if (device->verbosity(2))
{
FastAllocator::AllStatistics stat(&alloc);
for (size_t i=0; i<objects.size(); i++)
if (objects[i])
stat = stat + FastAllocator::AllStatistics(&objects[i]->alloc);
stat.print(numPrimitives);
}
if (device->verbosity(3))
{
alloc.print_blocks();
for (size_t i=0; i<objects.size(); i++)
if (objects[i])
objects[i]->alloc.print_blocks();
}
std::cout << std::flush;
}
/* benchmark mode */
if (device->benchmark)
{
if (!stat) stat.reset(new BVHNStatistics<N>(this));
Lock<MutexSys> lock(g_printMutex);
std::cout << "BENCHMARK_BUILD " << dt << " " << double(numPrimitives)/dt << " " << stat->sah() << " " << stat->bytesUsed() << " BVH" << N << "<" << primTy->name() << ">" << std::endl << std::flush;
}
}
#if defined(__AVX__)
template class BVHN<8>;
#endif
#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
template class BVHN<4>;
#endif
}

View file

@ -0,0 +1,235 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
/* include all node types */
#include "bvh_node_aabb.h"
#include "bvh_node_aabb_mb.h"
#include "bvh_node_aabb_mb4d.h"
#include "bvh_node_obb.h"
#include "bvh_node_obb_mb.h"
#include "bvh_node_qaabb.h"
namespace embree
{
/*! flags used to enable specific node types in intersectors */
enum BVHNodeFlags
{
BVH_FLAG_ALIGNED_NODE = 0x00001,
BVH_FLAG_ALIGNED_NODE_MB = 0x00010,
BVH_FLAG_UNALIGNED_NODE = 0x00100,
BVH_FLAG_UNALIGNED_NODE_MB = 0x01000,
BVH_FLAG_QUANTIZED_NODE = 0x100000,
BVH_FLAG_ALIGNED_NODE_MB4D = 0x1000000,
/* short versions */
BVH_AN1 = BVH_FLAG_ALIGNED_NODE,
BVH_AN2 = BVH_FLAG_ALIGNED_NODE_MB,
BVH_AN2_AN4D = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
BVH_UN1 = BVH_FLAG_UNALIGNED_NODE,
BVH_UN2 = BVH_FLAG_UNALIGNED_NODE_MB,
BVH_MB = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D,
BVH_AN1_UN1 = BVH_FLAG_ALIGNED_NODE | BVH_FLAG_UNALIGNED_NODE,
BVH_AN2_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_UNALIGNED_NODE_MB,
BVH_AN2_AN4D_UN2 = BVH_FLAG_ALIGNED_NODE_MB | BVH_FLAG_ALIGNED_NODE_MB4D | BVH_FLAG_UNALIGNED_NODE_MB,
BVH_QN1 = BVH_FLAG_QUANTIZED_NODE
};
/*! Multi BVH with N children. Each node stores the bounding box of
* it's N children as well as N child references. */
template<int N>
class BVHN : public AccelData
{
ALIGNED_CLASS_(16);
public:
/*! forward declaration of node ref type */
typedef NodeRefPtr<N> NodeRef;
typedef BaseNode_t<NodeRef,N> BaseNode;
typedef AABBNode_t<NodeRef,N> AABBNode;
typedef AABBNodeMB_t<NodeRef,N> AABBNodeMB;
typedef AABBNodeMB4D_t<NodeRef,N> AABBNodeMB4D;
typedef OBBNode_t<NodeRef,N> OBBNode;
typedef OBBNodeMB_t<NodeRef,N> OBBNodeMB;
typedef QuantizedBaseNode_t<N> QuantizedBaseNode;
typedef QuantizedBaseNodeMB_t<N> QuantizedBaseNodeMB;
typedef QuantizedNode_t<NodeRef,N> QuantizedNode;
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
static const size_t byteAlignment = 16;
static const size_t byteNodeAlignment = 4*N;
/*! Empty node */
static const size_t emptyNode = NodeRef::emptyNode;
/*! Invalid node, used as marker in traversal */
static const size_t invalidNode = NodeRef::invalidNode;
static const size_t popRay = NodeRef::popRay;
/*! Maximum depth of the BVH. */
static const size_t maxBuildDepth = 32;
static const size_t maxBuildDepthLeaf = maxBuildDepth+8;
static const size_t maxDepth = 2*maxBuildDepthLeaf; // 2x because of two level builder
/*! Maximum number of primitive blocks in a leaf. */
static const size_t maxLeafBlocks = NodeRef::maxLeafBlocks;
public:
/*! Builder interface to create allocator */
struct CreateAlloc : public FastAllocator::Create {
__forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {}
};
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
public:
/*! BVHN default constructor. */
BVHN (const PrimitiveType& primTy, Scene* scene);
/*! BVHN destruction */
~BVHN ();
/*! clears the acceleration structure */
void clear();
/*! sets BVH members after build */
void set (NodeRef root, const LBBox3fa& bounds, size_t numPrimitives);
/*! Clears the barrier bits of a subtree. */
void clearBarrier(NodeRef& node);
/*! lays out num large nodes of the BVH */
void layoutLargeNodes(size_t num);
NodeRef layoutLargeNodesRecursion(NodeRef& node, const FastAllocator::CachedAllocator& allocator);
/*! called by all builders before build starts */
double preBuild(const std::string& builderName);
/*! called by all builders after build ended */
void postBuild(double t0);
/*! allocator class */
struct Allocator {
BVHN* bvh;
Allocator (BVHN* bvh) : bvh(bvh) {}
__forceinline void* operator() (size_t bytes) const {
return bvh->alloc._threadLocal()->malloc(&bvh->alloc,bytes);
}
};
/*! post build cleanup */
void cleanup() {
alloc.cleanup();
}
public:
/*! Encodes a node */
static __forceinline NodeRef encodeNode(AABBNode* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(AABBNodeMB* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(AABBNodeMB4D* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(OBBNode* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeNode(OBBNodeMB* node) { return NodeRef::encodeNode(node); }
static __forceinline NodeRef encodeLeaf(void* tri, size_t num) { return NodeRef::encodeLeaf(tri,num); }
static __forceinline NodeRef encodeTypedLeaf(void* ptr, size_t ty) { return NodeRef::encodeTypedLeaf(ptr,ty); }
public:
/*! Prefetches the node this reference points to */
__forceinline static void prefetch(const NodeRef ref, int types=0)
{
#if defined(__AVX512PF__) // MIC
if (types != BVH_FLAG_QUANTIZED_NODE) {
prefetchL2(((char*)ref.ptr)+0*64);
prefetchL2(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
prefetchL2(((char*)ref.ptr)+2*64);
prefetchL2(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
/* KNL still needs L2 prefetches for large nodes */
prefetchL2(((char*)ref.ptr)+4*64);
prefetchL2(((char*)ref.ptr)+5*64);
prefetchL2(((char*)ref.ptr)+6*64);
prefetchL2(((char*)ref.ptr)+7*64);
}
}
else
{
/* todo: reduce if 32bit offsets are enabled */
prefetchL2(((char*)ref.ptr)+0*64);
prefetchL2(((char*)ref.ptr)+1*64);
prefetchL2(((char*)ref.ptr)+2*64);
}
#else
if (types != BVH_FLAG_QUANTIZED_NODE) {
prefetchL1(((char*)ref.ptr)+0*64);
prefetchL1(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
prefetchL1(((char*)ref.ptr)+2*64);
prefetchL1(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
/* deactivate for large nodes on Xeon, as it introduces regressions */
//prefetchL1(((char*)ref.ptr)+4*64);
//prefetchL1(((char*)ref.ptr)+5*64);
//prefetchL1(((char*)ref.ptr)+6*64);
//prefetchL1(((char*)ref.ptr)+7*64);
}
}
else
{
/* todo: reduce if 32bit offsets are enabled */
prefetchL1(((char*)ref.ptr)+0*64);
prefetchL1(((char*)ref.ptr)+1*64);
prefetchL1(((char*)ref.ptr)+2*64);
}
#endif
}
__forceinline static void prefetchW(const NodeRef ref, int types=0)
{
embree::prefetchEX(((char*)ref.ptr)+0*64);
embree::prefetchEX(((char*)ref.ptr)+1*64);
if ((N >= 8) || (types > BVH_FLAG_ALIGNED_NODE)) {
embree::prefetchEX(((char*)ref.ptr)+2*64);
embree::prefetchEX(((char*)ref.ptr)+3*64);
}
if ((N >= 8) && (types > BVH_FLAG_ALIGNED_NODE)) {
embree::prefetchEX(((char*)ref.ptr)+4*64);
embree::prefetchEX(((char*)ref.ptr)+5*64);
embree::prefetchEX(((char*)ref.ptr)+6*64);
embree::prefetchEX(((char*)ref.ptr)+7*64);
}
}
/*! bvh type information */
public:
const PrimitiveType* primTy; //!< primitive type stored in the BVH
/*! bvh data */
public:
Device* device; //!< device pointer
Scene* scene; //!< scene pointer
NodeRef root; //!< root node
FastAllocator alloc; //!< allocator used to allocate nodes
/*! statistics data */
public:
size_t numPrimitives; //!< number of primitives the BVH is build over
size_t numVertices; //!< number of vertices the BVH references
/*! data arrays for special builders */
public:
std::vector<BVHN*> objects;
vector_t<char,aligned_allocator<char,32>> subdiv_patches;
};
typedef BVHN<4> BVH4;
typedef BVHN<8> BVH8;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,318 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_factory.h"
namespace embree
{
/*! BVH4 instantiations */
class BVH4Factory : public BVHFactory
{
public:
BVH4Factory(int bfeatures, int ifeatures);
public:
Accel* BVH4OBBVirtualCurve4i(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve4v(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve8i(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve4iMB(Scene* scene, IntersectVariant ivariant);
Accel* BVH4OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4i);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8i);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector4iMB);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
Accel* BVH4Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::ROBUST);
Accel* BVH4Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4QuantizedTriangle4i(Scene* scene);
Accel* BVH4QuantizedQuad4i(Scene* scene);
Accel* BVH4SubdivPatch1(Scene* scene);
Accel* BVH4SubdivPatch1MB(Scene* scene);
Accel* BVH4UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4UserGeometryMB(Scene* scene);
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceArrayMB(Scene* scene);
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
private:
void selectBuilders(int features);
void selectIntersectors(int features);
private:
Accel::Intersectors BVH4OBBVirtualCurveIntersectors(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH4OBBVirtualCurveIntersectorsMB(BVH4* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4Intersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Triangle4vMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4vIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4iIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4Quad4iMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors QBVH4Quad4iIntersectors(BVH4* bvh);
Accel::Intersectors QBVH4Triangle4iIntersectors(BVH4* bvh);
Accel::Intersectors BVH4UserGeometryIntersectors(BVH4* bvh);
Accel::Intersectors BVH4UserGeometryMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH4GridMBIntersectors(BVH4* bvh, IntersectVariant ivariant);
private:
DEFINE_SYMBOL2(Accel::Collider,BVH4ColliderUserGeom);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersector1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4OBBVirtualCurveIntersectorRobust1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4vMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Triangle4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4Quad4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH4Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1Intersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4SubdivPatch1MBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4VirtualMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersector4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4OBBVirtualCurveIntersectorRobust4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4Intersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4vMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Triangle4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4Quad4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1Intersector4);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4SubdivPatch1MBIntersector4);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4VirtualMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
// ==============
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersector8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4OBBVirtualCurveIntersectorRobust8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4Intersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4vMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Triangle4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4Quad4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1Intersector8);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4SubdivPatch1MBIntersector8);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4VirtualMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
// ==============
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersector16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4OBBVirtualCurveIntersectorRobust16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4Intersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4vMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Triangle4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4Quad4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1Intersector16);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4SubdivPatch1MBIntersector16);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4VirtualMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve4iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve8iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1BuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4SubdivPatch1MBBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
// spatial scene builder
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Triangle4iSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
// twolevel scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,284 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_factory.h"
namespace embree
{
/*! BVH8 instantiations */
class BVH8Factory : public BVHFactory
{
public:
BVH8Factory(int bfeatures, int ifeatures);
public:
Accel* BVH8OBBVirtualCurve8v(Scene* scene, IntersectVariant ivariant);
Accel* BVH8OBBVirtualCurve8iMB(Scene* scene, IntersectVariant ivariant);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8v);
DEFINE_SYMBOL2(VirtualCurveIntersector*,VirtualCurveIntersector8iMB);
Accel* BVH8Triangle4 (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4vMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Triangle4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4v (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4i (Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8Quad4iMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8QuantizedTriangle4i(Scene* scene);
Accel* BVH8QuantizedTriangle4(Scene* scene);
Accel* BVH8QuantizedQuad4i(Scene* scene);
Accel* BVH8UserGeometry(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8UserGeometryMB(Scene* scene);
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceArrayMB(Scene* scene);
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
private:
void selectBuilders(int features);
void selectIntersectors(int features);
private:
Accel::Intersectors BVH8OBBVirtualCurveIntersectors(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH8OBBVirtualCurveIntersectorsMB(BVH8* bvh, VirtualCurveIntersector* leafIntersector, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4Intersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Triangle4vMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4vIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4iIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8Quad4iMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors QBVH8Triangle4iIntersectors(BVH8* bvh);
Accel::Intersectors QBVH8Triangle4Intersectors(BVH8* bvh);
Accel::Intersectors QBVH8Quad4iIntersectors(BVH8* bvh);
Accel::Intersectors BVH8UserGeometryIntersectors(BVH8* bvh);
Accel::Intersectors BVH8UserGeometryMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
private:
DEFINE_SYMBOL2(Accel::Collider,BVH8ColliderUserGeom);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersector1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8OBBVirtualCurveIntersectorRobust1MB);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Triangle4vIntersector1Woop);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4vIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8Quad4iMBIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Triangle4Intersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,QBVH8Quad4iIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8VirtualMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersector4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4Hybrid);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8OBBVirtualCurveIntersectorRobust4HybridMB);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4Intersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4vMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Triangle4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4vIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8Quad4iMBIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8VirtualMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersector8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8Hybrid);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8OBBVirtualCurveIntersectorRobust8HybridMB);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4Intersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4vMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Triangle4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4vIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8Quad4iMBIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8VirtualMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersector16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16Hybrid);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8OBBVirtualCurveIntersectorRobust16HybridMB);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4Intersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4vMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Triangle4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridMoellerNoFilter);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4vIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8Quad4iMBIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8VirtualMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedTriangle4SceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4iMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8QuantizedQuad4iSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8VirtualMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
// SAH spatial scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
// twolevel scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

View file

@ -0,0 +1,60 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_builder.h"
namespace embree
{
namespace isa
{
template<int N>
typename BVHN<N>::NodeRef BVHNBuilderVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRef>
(FastAllocator::Create(allocator),typename BVH::AABBNode::Create2(),typename BVH::AABBNode::Set3(allocator,prims),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template<int N>
typename BVHN<N>::NodeRef BVHNBuilderQuantizedVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRef {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRef>
(FastAllocator::Create(allocator),typename BVH::QuantizedNode::Create2(),typename BVH::QuantizedNode::Set2(),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template<int N>
typename BVHN<N>::NodeRecordMB BVHNBuilderMblurVirtual<N>::BVHNBuilderV::build(FastAllocator* allocator, BuildProgressMonitor& progressFunc, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange)
{
auto createLeafFunc = [&] (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) -> NodeRecordMB {
return createLeaf(prims,set,alloc);
};
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
return BVHBuilderBinnedSAH::build<NodeRecordMB>
(FastAllocator::Create(allocator),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::SetTimeRange(timeRange),createLeafFunc,progressFunc,prims,pinfo,settings);
}
template struct BVHNBuilderVirtual<4>;
template struct BVHNBuilderQuantizedVirtual<4>;
template struct BVHNBuilderMblurVirtual<4>;
#if defined(__AVX__)
template struct BVHNBuilderVirtual<8>;
template struct BVHNBuilderQuantizedVirtual<8>;
template struct BVHNBuilderMblurVirtual<8>;
#endif
}
}

View file

@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/bvh_builder_msmblur.h"
namespace embree
{
namespace isa
{
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N>
struct BVHNBuilderVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
}
};
template<int N>
struct BVHNBuilderQuantizedVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRef build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings);
virtual NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRef createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRef build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings);
}
};
template<int N>
struct BVHNBuilderMblurVirtual
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNodeMB AABBNodeMB;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
typedef FastAllocator::CachedAllocator Allocator;
struct BVHNBuilderV {
NodeRecordMB build(FastAllocator* allocator, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange);
virtual NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) = 0;
};
template<typename CreateLeafFunc>
struct BVHNBuilderT : public BVHNBuilderV
{
BVHNBuilderT (CreateLeafFunc createLeafFunc)
: createLeafFunc(createLeafFunc) {}
NodeRecordMB createLeaf (const PrimRef* prims, const range<size_t>& set, const Allocator& alloc) {
return createLeafFunc(prims,set,alloc);
}
private:
CreateLeafFunc createLeafFunc;
};
template<typename CreateLeafFunc>
static NodeRecordMB build(FastAllocator* allocator, CreateLeafFunc createLeaf, BuildProgressMonitor& progress, PrimRef* prims, const PrimInfo& pinfo, GeneralBVHBuilder::Settings settings, const BBox1f& timeRange) {
return BVHNBuilderT<CreateLeafFunc>(createLeaf).build(allocator,progress,prims,pinfo,settings,timeRange);
}
};
}
}

View file

@ -0,0 +1,583 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_statistics.h"
#include "bvh_rotate.h"
#include "../common/profile.h"
#include "../../common/algorithms/parallel_prefix_sum.h"
#include "../builders/primrefgen.h"
#include "../builders/bvh_builder_morton.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#if defined(__64BIT__)
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
#else
# define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues
#endif
namespace embree
{
namespace isa
{
template<int N>
struct SetBVHNBounds
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
typedef typename BVH::AABBNode AABBNode;
BVH* bvh;
__forceinline SetBVHNBounds (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRecord operator() (NodeRef ref, const NodeRecord* children, size_t num)
{
AABBNode* node = ref.getAABBNode();
BBox3fa res = empty;
for (size_t i=0; i<num; i++) {
const BBox3fa b = children[i].bounds;
res.extend(b);
node->setRef(i,children[i].ref);
node->setBounds(i,b);
}
BBox3fx result = (BBox3fx&)res;
#if ROTATE_TREE
if (N == 4)
{
size_t n = 0;
for (size_t i=0; i<num; i++)
n += children[i].bounds.lower.a;
if (n >= 4096) {
for (size_t i=0; i<num; i++) {
if (children[i].bounds.lower.a < 4096) {
for (int j=0; j<ROTATE_TREE; j++)
BVHNRotate<N>::rotate(node->child(i));
node->child(i).setBarrier();
}
}
}
result.lower.a = unsigned(n);
}
#endif
return NodeRecord(ref,result);
}
};
template<int N, typename Primitive>
struct CreateMortonLeaf;
template<int N>
struct CreateMortonLeaf<N,Triangle4>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4* accel = (Triangle4*) alloc.malloc1(sizeof(Triangle4),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4::store_nt(accel,Triangle4(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = unsigned(current.size());
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4v* accel = (Triangle4v*) alloc.malloc1(sizeof(Triangle4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero;
const TriangleMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
}
Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Triangle4i>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (TriangleMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Triangle4i* accel = (Triangle4i*) alloc.malloc1(sizeof(Triangle4i),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 v0 = zero, v1 = zero, v2 = zero;
vuint4 vgeomID = -1, vprimID = -1;
const TriangleMesh* __restrict__ const mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const TriangleMesh::Triangle& tri = mesh->triangle(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2);
vgeomID[i] = geomID_;
vprimID[i] = primID;
unsigned int int_stride = mesh->vertices0.getStride()/4;
v0[i] = tri.v[0] * int_stride;
v1[i] = tri.v[1] * int_stride;
v2[i] = tri.v[2] * int_stride;
}
for (size_t i=items; i<4; i++)
{
vgeomID[i] = vgeomID[0];
vprimID[i] = -1;
v0[i] = 0;
v1[i] = 0;
v2[i] = 0;
}
Triangle4i::store_nt(accel,Triangle4i(v0,v1,v2,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
TriangleMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Quad4v>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (QuadMesh* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items<=4);
/* allocate leaf node */
Quad4v* accel = (Quad4v*) alloc.malloc1(sizeof(Quad4v),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,1);
vuint4 vgeomID = -1, vprimID = -1;
Vec3vf4 v0 = zero, v1 = zero, v2 = zero, v3 = zero;
const QuadMesh* __restrict__ mesh = this->mesh;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
const QuadMesh::Quad& tri = mesh->quad(primID);
const Vec3fa& p0 = mesh->vertex(tri.v[0]);
const Vec3fa& p1 = mesh->vertex(tri.v[1]);
const Vec3fa& p2 = mesh->vertex(tri.v[2]);
const Vec3fa& p3 = mesh->vertex(tri.v[3]);
lower = min(lower,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
upper = max(upper,(vfloat4)p0,(vfloat4)p1,(vfloat4)p2,(vfloat4)p3);
vgeomID [i] = geomID_;
vprimID [i] = primID;
v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
v3.x[i] = p3.x; v3.y[i] = p3.y; v3.z[i] = p3.z;
}
Quad4v::store_nt(accel,Quad4v(v0,v1,v2,v3,vgeomID,vprimID));
BBox3fx box_o = BBox3fx((Vec3fx)lower,(Vec3fx)upper);
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
QuadMesh* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,Object>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (UserGeometry* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
/* allocate leaf node */
Object* accel = (Object*) alloc.malloc1(items*sizeof(Object),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const UserGeometry* mesh = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int index = morton[start+i].index;
const unsigned int primID = index;
bounds.extend(mesh->bounds(primID));
new (&accel[i]) Object(geomID_,primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
UserGeometry* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstancePrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (Instance* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstancePrimitive* accel = (InstancePrimitive*) alloc.malloc1(items*sizeof(InstancePrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const Instance* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstancePrimitive(instance, geomID_);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
Instance* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const InstanceArray* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
InstanceArray* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<typename Mesh>
struct CalculateMeshBounds
{
__forceinline CalculateMeshBounds (Mesh* mesh)
: mesh(mesh) {}
__forceinline const BBox3fa operator() (const BVHBuilderMorton::BuildPrim& morton) {
return mesh->bounds(morton.index);
}
private:
Mesh* mesh;
};
template<int N, typename Mesh, typename Primitive>
class BVHNMeshBuilderMorton : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
public:
BVHNMeshBuilderMorton (BVH* bvh, Mesh* mesh, unsigned int geomID, const size_t minLeafSize, const size_t maxLeafSize, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD)
: bvh(bvh), mesh(mesh), morton(bvh->device,0), settings(N,BVH::maxBuildDepth,minLeafSize,min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks),singleThreadThreshold), geomID_(geomID) {}
/* build function */
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
morton.clear();
}
size_t numPrimitives = mesh->size();
numPreviousPrimitives = numPrimitives;
/* skip build for empty scene */
if (numPrimitives == 0) {
bvh->set(BVH::emptyNode,empty,0);
return;
}
/* preallocate arrays */
morton.resize(numPrimitives);
size_t bytesEstimated = numPrimitives*sizeof(AABBNode)/(4*N) + size_t(1.2f*Primitive::blocks(numPrimitives)*sizeof(Primitive));
size_t bytesMortonCodes = numPrimitives*sizeof(BVHBuilderMorton::BuildPrim);
bytesEstimated = max(bytesEstimated,bytesMortonCodes); // the first allocation block is reused to sort the morton codes
bvh->alloc.init(bytesMortonCodes,bytesMortonCodes,bytesEstimated);
/* create morton code array */
BVHBuilderMorton::BuildPrim* dest = (BVHBuilderMorton::BuildPrim*) bvh->alloc.specialAlloc(bytesMortonCodes);
size_t numPrimitivesGen = createMortonCodeArray<Mesh>(mesh,morton,bvh->scene->progressInterface);
/* create BVH */
SetBVHNBounds<N> setBounds(bvh);
CreateMortonLeaf<N,Primitive> createLeaf(mesh,geomID_,morton.data());
CalculateMeshBounds<Mesh> calculateBounds(mesh);
auto root = BVHBuilderMorton::build<NodeRecord>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create(),
setBounds,createLeaf,calculateBounds,bvh->scene->progressInterface,
morton.data(),dest,numPrimitivesGen,settings);
bvh->set(root.ref,LBBox3fa(root.bounds),numPrimitives);
#if ROTATE_TREE
if (N == 4)
{
for (int i=0; i<ROTATE_TREE; i++)
BVHNRotate<N>::rotate(bvh->root);
bvh->clearBarrier(bvh->root);
}
#endif
/* clear temporary data for static geometry */
if (bvh->scene->isStaticAccel()) {
morton.clear();
}
bvh->cleanup();
}
void clear() {
morton.clear();
}
private:
BVH* bvh;
Mesh* mesh;
mvector<BVHBuilderMorton::BuildPrim> morton;
BVHBuilderMorton::Settings settings;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
};
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4> ((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4v>((BVH4*)bvh,mesh,geomID,4,4); }
Builder* BVH4Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,TriangleMesh,Triangle4i>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4> ((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4vMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4v>((BVH8*)bvh,mesh,geomID,4,4); }
Builder* BVH8Triangle4iMeshBuilderMortonGeneral (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,TriangleMesh,Triangle4i>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,QuadMesh,Quad4v>((BVH4*)bvh,mesh,geomID,4,4); }
#if defined(__AVX__)
Builder* BVH8Quad4vMeshBuilderMortonGeneral (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,QuadMesh,Quad4v>((BVH8*)bvh,mesh,geomID,4,4); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,UserGeometry,Object>((BVH4*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8VirtualMeshBuilderMortonGeneral (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,UserGeometry,Object>((BVH8*)bvh,mesh,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,Instance,InstancePrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
}
}

View file

@ -0,0 +1,565 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/primrefgen.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
#define PROFILE 0
#define PROFILE_RUNS 20
namespace embree
{
namespace isa
{
template<int N, typename Primitive>
struct CreateLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeaf (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
template<int N, typename Primitive>
struct CreateLeafQuantized
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafQuantized (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct BVHNBuilderSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Geometry* mesh;
mvector<PrimRef> prims;
GeneralBVHBuilder::Settings settings;
Geometry::GTypeMask gtype_;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max ();
bool primrefarrayalloc;
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize,
const Geometry::GTypeMask gtype, bool primrefarrayalloc = false)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0),
settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), primrefarrayalloc(primrefarrayalloc) {}
BVHNBuilderSAH (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID), primrefarrayalloc(false) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* if we use the primrefarray for allocations we have to take it back from the BVH */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.unshare(prims);
/* skip build for empty scene */
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
numPreviousPrimitives = numPrimitives;
if (numPrimitives == 0) {
bvh->clear();
prims.clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
/* create primref array */
if (primrefarrayalloc) {
settings.primrefarrayalloc = numPrimitives/1000;
if (settings.primrefarrayalloc < 1000)
settings.primrefarrayalloc = inf;
}
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* initialize allocator */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
prims.resize(numPrimitives);
PrimInfo pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
/* pinfo might has zero size due to invalid geometry */
if (unlikely(pinfo.size() == 0))
{
bvh->clear();
prims.clear();
return;
}
/* call BVH builder */
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeaf<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
#if PROFILE
});
#endif
/* if we allocated using the primrefarray we have to keep it alive */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.share(prims);
/* for static geometries we can do some cleanups */
else if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct BVHNBuilderSAHQuantized : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Geometry* mesh;
mvector<PrimRef> prims;
GeneralBVHBuilder::Settings settings;
Geometry::GTypeMask gtype_;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAHQuantized (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype) {}
BVHNBuilderSAHQuantized (BVH* bvh, Geometry* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), gtype_(gtype), geomID_(geomID) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* skip build for empty scene */
const size_t numPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(gtype_,false);
numPreviousPrimitives = numPrimitives;
if (numPrimitives == 0) {
prims.clear();
bvh->clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::QBVH" + toString(N) + "BuilderSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
/* create primref array */
prims.resize(numPrimitives);
PrimInfo pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) :
createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface);
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* call BVH builder */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::QuantizedNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(numPrimitives)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
NodeRef root = BVHNBuilderQuantizedVirtual<N>::build(&bvh->alloc,CreateLeafQuantized<N,Primitive>(bvh),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
//bvh->layoutLargeNodes(pinfo.size()*0.005f); // FIXME: COPY LAYOUT FOR LARGE NODES !!!
#if PROFILE
});
#endif
/* clear temporary data for static geometry */
if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
template<int N, typename Primitive>
struct CreateLeafGrid
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafGrid (BVH* bvh, const SubGridBuildData * const sgrids) : bvh(bvh),sgrids(sgrids) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = set.size(); //Primitive::blocks(n);
const size_t start = set.begin();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridQBVHN<N>* accel = (SubGridQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,num_geomIDs);
for (size_t g=0;g<num_geomIDs;g++)
{
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData& sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
bounds[pos] = prims[start+i].bounds();
pos++;
}
assert(pos <= N);
new (&accel[g]) SubGridQBVHN<N>(x,y,primID,bounds,geomIDs[g],pos);
}
return node;
}
BVH* bvh;
const SubGridBuildData * const sgrids;
};
template<int N>
struct BVHNBuilderSAHGrid : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
GridMesh* mesh;
mvector<PrimRef> prims;
mvector<SubGridBuildData> sgrids;
GeneralBVHBuilder::Settings settings;
const unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(scene), mesh(nullptr), prims(scene->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD) {}
BVHNBuilderSAHGrid (BVH* bvh, GridMesh* mesh, unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(nullptr), mesh(mesh), prims(bvh->device,0), sgrids(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD), geomID_(geomID) {}
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* if we use the primrefarray for allocations we have to take it back from the BVH */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.unshare(prims);
const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false);
numPreviousPrimitives = numGridPrimitives;
PrimInfo pinfo = mesh ? createPrimRefArrayGrids(mesh,prims,sgrids) : createPrimRefArrayGrids(scene,prims,sgrids);
const size_t numPrimitives = pinfo.size();
/* no primitives */
if (numPrimitives == 0) {
bvh->clear();
prims.clear();
sgrids.clear();
return;
}
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + "BuilderSAH");
/* create primref array */
settings.primrefarrayalloc = numPrimitives/1000;
if (settings.primrefarrayalloc < 1000)
settings.primrefarrayalloc = inf;
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
/* initialize allocator */
const size_t node_bytes = numPrimitives*sizeof(typename BVH::AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,numPrimitives,node_bytes+leaf_bytes);
/* pinfo might has zero size due to invalid geometry */
if (unlikely(pinfo.size() == 0))
{
bvh->clear();
sgrids.clear();
prims.clear();
return;
}
/* call BVH builder */
NodeRef root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafGrid<N,SubGridQBVHN<N>>(bvh,sgrids.data()),bvh->scene->progressInterface,prims.data(),pinfo,settings);
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
/* clear temporary array */
sgrids.clear();
/* if we allocated using the primrefarray we have to keep it alive */
if (settings.primrefarrayalloc != size_t(inf))
bvh->alloc.share(prims);
/* for static geometries we can do some cleanups */
else if (scene && scene->isStaticAccel()) {
prims.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4v>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); }
Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4iMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
Builder* BVH4QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4v>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH4QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
#if defined(__AVX__)
Builder* BVH8Quad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8Quad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type,true); }
Builder* BVH8QuantizedQuad4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4v>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8QuantizedQuad4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,QuadMesh::geom_type); }
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Quad4v>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,QuadMesh::geom_type); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_min_leaf_size;
int maxLeafSize = scene->device->object_accel_max_leaf_size;
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
}
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,Object>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,UserGeometry::geom_type);
}
#if defined(__AVX__)
Builder* BVH8VirtualSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_min_leaf_size;
int maxLeafSize = scene->device->object_accel_max_leaf_size;
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,UserGeometry::geom_type);
}
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,Object>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,UserGeometry::geom_type);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_GRID)
Builder* BVH4GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,4,mode); }
Builder* BVH4GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4,mode); } // FIXME: check whether cost factors are correct
#if defined(__AVX__)
Builder* BVH8GridMeshBuilderSAH (void* bvh, GridMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,mesh,geomID,8,1.0f,8,8,mode); }
Builder* BVH8GridSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8,mode); } // FIXME: check whether cost factors are correct
#endif
#endif
}
}

View file

@ -0,0 +1,713 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/bvh_builder_msmblur.h"
#include "../builders/primrefgen.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
// FIXME: remove after removing BVHNBuilderMBlurRootTimeSplitsSAH
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
namespace embree
{
namespace isa
{
#if 0
template<int N, typename Primitive>
struct CreateMBlurLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
__forceinline CreateMBlurLeaf (BVH* bvh, PrimRef* prims, size_t time) : bvh(bvh), prims(prims), time(time) {}
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t items = Primitive::blocks(set.size());
size_t start = set.begin();
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa allBounds = empty;
for (size_t i=0; i<items; i++)
allBounds.extend(accel[i].fillMB(prims, start, set.end(), bvh->scene, time));
return NodeRecordMB(node,allBounds);
}
BVH* bvh;
PrimRef* prims;
size_t time;
};
#endif
template<int N, typename Mesh, typename Primitive>
struct CreateMSMBlurLeaf
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
__forceinline CreateMSMBlurLeaf (BVH* bvh) : bvh(bvh) {}
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
{
size_t items = Primitive::blocks(current.prims.size());
size_t start = current.prims.begin();
size_t end = current.prims.end();
for (size_t i=start; i<end; i++) assert((*current.prims.prims)[start].geomID() == (*current.prims.prims)[i].geomID()); // assert that all geomIDs are identical
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteNodeAlignment);
NodeRef node = bvh->encodeLeaf((char*)accel,items);
LBBox3fa allBounds = empty;
for (size_t i=0; i<items; i++)
allBounds.extend(accel[i].fillMB(current.prims.prims->data(), start, current.prims.end(), bvh->scene, current.prims.time_range));
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
}
BVH* bvh;
};
/* Motion blur BVH with 4D nodes and internal time splits */
template<int N, typename Mesh, typename Primitive>
struct BVHNBuilderMBlurSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
BVH* bvh;
Scene* scene;
const size_t sahBlockSize;
const float intCost;
const size_t minLeafSize;
const size_t maxLeafSize;
const Geometry::GTypeMask gtype_;
BVHNBuilderMBlurSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const Geometry::GTypeMask gtype)
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks)), gtype_(gtype) {}
void build()
{
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(gtype_,true);
if (numPrimitives == 0) { bvh->clear(); return; }
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAH");
#if PROFILE
profile(2,PROFILE_RUNS,numPrimitives,[&] (ProfileTimer& timer) {
#endif
//const size_t numTimeSteps = scene->getNumTimeSteps<typename Mesh::type_t,true>();
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
/*if (numTimeSegments == 1)
buildSingleSegment(numPrimitives);
else*/
buildMultiSegment(numPrimitives);
#if PROFILE
});
#endif
/* clear temporary data for static geometry */
bvh->cleanup();
bvh->postBuild(t0);
}
#if 0 // No longer compatible when time_ranges are present for geometries. Would have to create temporal nodes sometimes, and put only a single geometry into leaf.
void buildSingleSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRef> prims(scene->device,numPrimitives);
const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface,0);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
(typename BVH::CreateAlloc(bvh),typename BVH::AABBNodeMB::Create(),typename BVH::AABBNodeMB::Set(),
CreateMBlurLeaf<N,Primitive>(bvh,prims.data(),0),bvh->scene->progressInterface,
prims.data(),pinfo,settings);
bvh->set(root.ref,root.lbounds,pinfo.size());
}
#endif
void buildMultiSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRefMB> prims(scene->device,numPrimitives);
PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
BVHBuilderMSMBlur::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxDepth;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleLeafTimeSegment = Primitive::singleTimeSegment;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root =
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
RecalculatePrimRef<Mesh>(scene),
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB4D::Create(),
typename BVH::AABBNodeMB4D::Set(),
CreateMSMBlurLeaf<N,Mesh,Primitive>(bvh),
bvh->scene->progressInterface,
settings);
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
}
void clear() {
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
struct GridRecalculatePrimRef
{
Scene* scene;
const SubGridBuildData * const sgrids;
__forceinline GridRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids)
: scene(scene), sgrids(sgrids) {}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
{
const unsigned int geomID = prim.geomID();
const GridMesh* mesh = scene->get<GridMesh>(geomID);
const unsigned int buildID = prim.primID();
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
const LBBox3fa lbounds = mesh->linearBounds(mesh->grid(primID),x,y,time_range);
const unsigned num_time_segments = mesh->numTimeSegments();
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, num_time_segments, geomID, buildID);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
const unsigned int geomID = prim.geomID();
const GridMesh* mesh = scene->get<GridMesh>(geomID);
const unsigned int buildID = prim.primID();
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
return mesh->linearBounds(mesh->grid(primID),x,y,time_range);
}
};
template<int N>
struct CreateMSMBlurLeafGrid
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB4D NodeRecordMB4D;
__forceinline CreateMSMBlurLeafGrid (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids) : scene(scene), bvh(bvh), sgrids(sgrids) {}
__forceinline const NodeRecordMB4D operator() (const BVHBuilderMSMBlur::BuildRecord& current, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = current.prims.size();
const size_t start = current.prims.begin();
const PrimRefMB* prims = current.prims.prims->data();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
LBBox3fa allBounds = empty;
for (size_t g=0;g<num_geomIDs;g++)
{
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds0[N];
BBox3fa bounds1[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
const size_t x = sgrid_bd.x();
const size_t y = sgrid_bd.y();
LBBox3fa newBounds = mesh->linearBounds(mesh->grid(sgrid_bd.primID),x,y,current.prims.time_range);
allBounds.extend(newBounds);
bounds0[pos] = newBounds.bounds0;
bounds1[pos] = newBounds.bounds1;
pos++;
}
assert(pos <= N);
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],current.prims.time_range.lower,1.0f/current.prims.time_range.size(),pos);
}
return NodeRecordMB4D(node,allBounds,current.prims.time_range);
}
Scene *scene;
BVH* bvh;
const SubGridBuildData * const sgrids;
};
#if 0
template<int N>
struct CreateLeafGridMB
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecordMB NodeRecordMB;
__forceinline CreateLeafGridMB (Scene* scene, BVH* bvh, const SubGridBuildData * const sgrids)
: scene(scene), bvh(bvh), sgrids(sgrids) {}
__forceinline NodeRecordMB operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
const size_t items = set.size();
const size_t start = set.begin();
/* collect all subsets with unique geomIDs */
assert(items <= N);
unsigned int geomIDs[N];
unsigned int num_geomIDs = 1;
geomIDs[0] = prims[start].geomID();
for (size_t i=1;i<items;i++)
{
bool found = false;
const unsigned int new_geomID = prims[start+i].geomID();
for (size_t j=0;j<num_geomIDs;j++)
if (new_geomID == geomIDs[j])
{ found = true; break; }
if (!found)
geomIDs[num_geomIDs++] = new_geomID;
}
/* allocate all leaf memory in one single block */
SubGridMBQBVHN<N>* accel = (SubGridMBQBVHN<N>*) alloc.malloc1(num_geomIDs*sizeof(SubGridMBQBVHN<N>),BVH::byteAlignment);
typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,num_geomIDs);
LBBox3fa allBounds = empty;
for (size_t g=0;g<num_geomIDs;g++)
{
const GridMesh* __restrict__ const mesh = scene->get<GridMesh>(geomIDs[g]);
unsigned int x[N];
unsigned int y[N];
unsigned int primID[N];
BBox3fa bounds0[N];
BBox3fa bounds1[N];
unsigned int pos = 0;
for (size_t i=0;i<items;i++)
{
if (unlikely(prims[start+i].geomID() != geomIDs[g])) continue;
const SubGridBuildData &sgrid_bd = sgrids[prims[start+i].primID()];
x[pos] = sgrid_bd.sx;
y[pos] = sgrid_bd.sy;
primID[pos] = sgrid_bd.primID;
const size_t x = sgrid_bd.x();
const size_t y = sgrid_bd.y();
bool MAYBE_UNUSED valid0 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,0,bounds0[pos]);
bool MAYBE_UNUSED valid1 = mesh->buildBounds(mesh->grid(sgrid_bd.primID),x,y,1,bounds1[pos]);
assert(valid0);
assert(valid1);
allBounds.extend(LBBox3fa(bounds0[pos],bounds1[pos]));
pos++;
}
new (&accel[g]) SubGridMBQBVHN<N>(x,y,primID,bounds0,bounds1,geomIDs[g],0.0f,1.0f,pos);
}
return NodeRecordMB(node,allBounds);
}
Scene *scene;
BVH* bvh;
const SubGridBuildData * const sgrids;
};
#endif
/* Motion blur BVH with 4D nodes and internal time splits */
template<int N>
struct BVHNBuilderMBlurSAHGrid : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVHN<N>::NodeRef NodeRef;
typedef typename BVHN<N>::NodeRecordMB NodeRecordMB;
typedef typename BVHN<N>::AABBNodeMB AABBNodeMB;
BVH* bvh;
Scene* scene;
const size_t sahBlockSize;
const float intCost;
const size_t minLeafSize;
const size_t maxLeafSize;
mvector<SubGridBuildData> sgrids;
BVHNBuilderMBlurSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize)
: bvh(bvh), scene(scene), sahBlockSize(sahBlockSize), intCost(intCost), minLeafSize(minLeafSize), maxLeafSize(min(maxLeafSize,BVH::maxLeafBlocks)), sgrids(scene->device,0) {}
PrimInfo createPrimRefArrayMBlurGrid(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j,range<size_t>(0,1))) continue;
BBox3fa bounds = empty;
const PrimRef prim(bounds,unsigned(geomID),unsigned(j));
pinfo.add_center2(prim,mesh->getNumSubGrids(j));
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
size_t numPrimitives = pinfo.size();
if (numPrimitives == 0) return pinfo;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
k = base.size();
size_t p_index = k;
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
const GridMesh::Grid &g = mesh->grid(j);
if (!mesh->valid(j,range<size_t>(0,1))) continue;
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!mesh->buildBounds(g,x,y,itime,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,unsigned(geomID),unsigned(p_index));
pinfo.add_center2(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfo;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo.size() == numPrimitives);
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f))
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
LBBox3fa bounds(empty);
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
pinfoMB.merge(gridMB);
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
size_t numPrimitives = pinfoMB.size();
if (numPrimitives == 0) return pinfoMB;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
k = base.size();
size_t p_index = k;
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
const GridMesh::Grid &g = mesh->grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
const PrimRefMB prim(mesh->linearBounds(g,x,y,t0t1),mesh->numTimeSegments(),mesh->time_range,mesh->numTimeSegments(),unsigned(geomID),unsigned(p_index));
pinfoMB.add_primref(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
assert(pinfoMB.size() == numPrimitives);
pinfoMB.time_range = t0t1;
return pinfoMB;
}
void build()
{
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(GridMesh::geom_type,true);
if (numPrimitives == 0) { bvh->clear(); return; }
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderMBlurSAHGrid");
//const size_t numTimeSteps = scene->getNumTimeSteps<GridMesh,true>();
//const size_t numTimeSegments = numTimeSteps-1; assert(numTimeSteps > 1);
//if (numTimeSegments == 1)
// buildSingleSegment(numPrimitives);
//else
buildMultiSegment(numPrimitives);
/* clear temporary data for static geometry */
bvh->cleanup();
bvh->postBuild(t0);
}
#if 0
void buildSingleSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRef> prims(scene->device,numPrimitives);
const PrimInfo pinfo = createPrimRefArrayMBlurGrid(scene,prims,bvh->scene->progressInterface,0);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.size()*sizeof(AABBNodeMB)/(4*N);
//TODO: check leaf_bytes
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root = BVHBuilderBinnedSAH::build<NodeRecordMB>
(typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB::Create(),
typename BVH::AABBNodeMB::Set(),
CreateLeafGridMB<N>(scene,bvh,sgrids.data()),
bvh->scene->progressInterface,
prims.data(),pinfo,settings);
bvh->set(root.ref,root.lbounds,pinfo.size());
}
#endif
void buildMultiSegment(size_t numPrimitives)
{
/* create primref array */
mvector<PrimRefMB> prims(scene->device,numPrimitives);
PrimInfoMB pinfo = createPrimRefArrayMSMBlurGrid(scene,prims,bvh->scene->progressInterface);
/* early out if no valid primitives */
if (pinfo.size() == 0) { bvh->clear(); return; }
GridRecalculatePrimRef recalculatePrimRef(scene,sgrids.data());
/* estimate acceleration structure size */
const size_t node_bytes = pinfo.num_time_segments*sizeof(AABBNodeMB)/(4*N);
//FIXME: check leaf_bytes
//const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.num_time_segments)*sizeof(SubGridQBVHN<N>));
const size_t leaf_bytes = size_t(1.2*(float)numPrimitives/N * sizeof(SubGridQBVHN<N>));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
/* settings for BVH build */
BVHBuilderMSMBlur::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxDepth;
settings.logBlockSize = bsr(sahBlockSize);
settings.minLeafSize = min(minLeafSize,maxLeafSize);
settings.maxLeafSize = maxLeafSize;
settings.travCost = travCost;
settings.intCost = intCost;
settings.singleLeafTimeSegment = false;
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
/* build hierarchy */
auto root =
BVHBuilderMSMBlur::build<NodeRef>(prims,pinfo,scene->device,
recalculatePrimRef,
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNodeMB4D::Create(),
typename BVH::AABBNodeMB4D::Set(),
CreateMSMBlurLeafGrid<N>(scene,bvh,sgrids.data()),
bvh->scene->progressInterface,
settings);
bvh->set(root.ref,root.lbounds,pinfo.num_time_segments);
}
void clear() {
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
Builder* BVH4Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,TriangleMesh,Triangle4vMB>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
#if defined(__AVX__)
Builder* BVH8Triangle4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
Builder* BVH8Triangle4vMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,TriangleMesh,Triangle4vMB>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_TRIANGLE_MESH); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<4,QuadMesh,Quad4i>((BVH4*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
#if defined(__AVX__)
Builder* BVH8Quad4iMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAH<8,QuadMesh,Quad4i>((BVH8*)bvh,scene,4,1.0f,4,inf,Geometry::MTY_QUAD_MESH); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
return new BVHNBuilderMBlurSAH<4,UserGeometry,Object>((BVH4*)bvh,scene,4,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
}
#if defined(__AVX__)
Builder* BVH8VirtualMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) {
int minLeafSize = scene->device->object_accel_mb_min_leaf_size;
int maxLeafSize = scene->device->object_accel_mb_max_leaf_size;
return new BVHNBuilderMBlurSAH<8,UserGeometry,Object>((BVH8*)bvh,scene,8,1.0f,minLeafSize,maxLeafSize,Geometry::MTY_USER_GEOMETRY);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
#if defined(__AVX__)
Builder* BVH8InstanceMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_GRID)
Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
#if defined(__AVX__)
Builder* BVH8GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<8>((BVH8*)bvh,scene,8,1.0f,8,8); }
#endif
#endif
}
}

View file

@ -0,0 +1,201 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh.h"
#include "bvh_builder.h"
#include "../builders/primrefgen.h"
#include "../builders/primrefgen_presplit.h"
#include "../builders/splitter.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglev_mb.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
namespace embree
{
namespace isa
{
template<int N, typename Primitive>
struct CreateLeafSpatial
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
__forceinline CreateLeafSpatial (BVH* bvh) : bvh(bvh) {}
__forceinline NodeRef operator() (const PrimRef* prims, const range<size_t>& set, const FastAllocator::CachedAllocator& alloc) const
{
size_t n = set.size();
size_t items = Primitive::blocks(n);
size_t start = set.begin();
Primitive* accel = (Primitive*) alloc.malloc1(items*sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,items);
for (size_t i=0; i<items; i++) {
accel[i].fill(prims,start,set.end(),bvh->scene);
}
return node;
}
BVH* bvh;
};
template<int N, typename Mesh, typename Primitive, typename Splitter>
struct BVHNBuilderFastSpatialSAH : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
BVH* bvh;
Scene* scene;
Mesh* mesh;
mvector<PrimRef> prims0;
GeneralBVHBuilder::Settings settings;
const float splitFactor;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
unsigned int numPreviousPrimitives = 0;
BVHNBuilderFastSpatialSAH (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(scene), mesh(nullptr), prims0(scene->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
splitFactor(scene->device->max_spatial_split_replications) {}
BVHNBuilderFastSpatialSAH (BVH* bvh, Mesh* mesh, const unsigned int geomID, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode)
: bvh(bvh), scene(nullptr), mesh(mesh), prims0(bvh->device,0), settings(sahBlockSize, minLeafSize, min(maxLeafSize,Primitive::max_size()*BVH::maxLeafBlocks), travCost, intCost, DEFAULT_SINGLE_THREAD_THRESHOLD),
splitFactor(scene->device->max_spatial_split_replications), geomID_(geomID) {}
// FIXME: shrink bvh->alloc in destructor here and in other builders too
void build()
{
/* we reset the allocator when the mesh size changed */
if (mesh && mesh->numPrimitives != numPreviousPrimitives) {
bvh->alloc.clear();
}
/* skip build for empty scene */
const size_t numOriginalPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(Mesh::geom_type,false);
numPreviousPrimitives = numOriginalPrimitives;
if (numOriginalPrimitives == 0) {
prims0.clear();
bvh->clear();
return;
}
const unsigned int maxGeomID = mesh ? geomID_ : scene->getMaxGeomID<Mesh,false>();
const bool usePreSplits = scene->device->useSpatialPreSplits || (maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)));
double t0 = bvh->preBuild(mesh ? "" : TOSTRING(isa) "::BVH" + toString(N) + (usePreSplits ? "BuilderFastSpatialPresplitSAH" : "BuilderFastSpatialSAH"));
/* create primref array */
const size_t numSplitPrimitives = max(numOriginalPrimitives,size_t(splitFactor*numOriginalPrimitives));
prims0.resize(numSplitPrimitives);
/* enable os_malloc for two level build */
if (mesh)
bvh->alloc.setOSallocation(true);
NodeRef root(0);
PrimInfo pinfo;
if (likely(usePreSplits))
{
/* spatial presplit SAH BVH builder */
pinfo = mesh ?
createPrimRefArray_presplit<Mesh,Splitter>(mesh,maxGeomID,numOriginalPrimitives,prims0,bvh->scene->progressInterface) :
createPrimRefArray_presplit<Mesh,Splitter>(scene,Mesh::geom_type,false,numOriginalPrimitives,prims0,bvh->scene->progressInterface);
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
/* call BVH builder */
root = BVHNBuilderVirtual<N>::build(&bvh->alloc,CreateLeafSpatial<N,Primitive>(bvh),bvh->scene->progressInterface,prims0.data(),pinfo,settings);
}
else
{
/* standard spatial split SAH BVH builder */
pinfo = mesh ?
createPrimRefArray(mesh,geomID_,numSplitPrimitives,prims0,bvh->scene->progressInterface) :
createPrimRefArray(scene,Mesh::geom_type,false,numSplitPrimitives,prims0,bvh->scene->progressInterface);
Splitter splitter(scene);
const size_t node_bytes = pinfo.size()*sizeof(typename BVH::AABBNode)/(4*N);
const size_t leaf_bytes = size_t(1.2*Primitive::blocks(pinfo.size())*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
settings.singleThreadThreshold = bvh->alloc.fixSingleThreadThreshold(N,DEFAULT_SINGLE_THREAD_THRESHOLD,pinfo.size(),node_bytes+leaf_bytes);
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
/* call BVH builder */
root = BVHBuilderBinnedFastSpatialSAH::build<NodeRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
CreateLeafSpatial<N,Primitive>(bvh),
splitter,
bvh->scene->progressInterface,
prims0.data(),
numSplitPrimitives,
pinfo,settings);
/* ==================== */
}
bvh->set(root,LBBox3fa(pinfo.geomBounds),pinfo.size());
bvh->layoutLargeNodes(size_t(pinfo.size()*0.005f));
/* clear temporary data for static geometry */
if (scene && scene->isStaticAccel()) {
prims0.clear();
}
bvh->cleanup();
bvh->postBuild(t0);
}
void clear() {
prims0.clear();
}
};
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
#if defined(__AVX__)
Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH8Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,QuadMesh,Quad4v,QuadSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
#if defined(__AVX__)
Builder* BVH8Quad4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,QuadMesh,Quad4v,QuadSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }
#endif
#endif
}
}

View file

@ -0,0 +1,385 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "bvh_builder_twolevel.h"
#include "bvh_statistics.h"
#include "../builders/bvh_builder_sah.h"
#include "../common/scene_line_segments.h"
#include "../common/scene_triangle_mesh.h"
#include "../common/scene_quad_mesh.h"
#define PROFILE 0
namespace embree
{
namespace isa
{
template<int N, typename Mesh, typename Primitive>
BVHNBuilderTwoLevel<N,Mesh,Primitive>::BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder, const size_t singleThreadThreshold)
: bvh(bvh), scene(scene), refs(scene->device,0), prims(scene->device,0), singleThreadThreshold(singleThreadThreshold), gtype(gtype), useMortonBuilder_(useMortonBuilder) {}
template<int N, typename Mesh, typename Primitive>
BVHNBuilderTwoLevel<N,Mesh,Primitive>::~BVHNBuilderTwoLevel () {
}
// ===========================================================================
// ===========================================================================
// ===========================================================================
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::build()
{
/* delete some objects */
size_t num = scene->size();
if (num < bvh->objects.size()) {
parallel_for(num, bvh->objects.size(), [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
builders[i].reset();
delete bvh->objects[i]; bvh->objects[i] = nullptr;
}
});
}
#if PROFILE
while(1)
#endif
{
/* reset memory allocator */
bvh->alloc.reset();
/* skip build for empty scene */
const size_t numPrimitives = scene->getNumPrimitives(gtype,false);
if (numPrimitives == 0) {
prims.resize(0);
bvh->set(BVH::emptyNode,empty,0);
return;
}
/* calculate the size of the entire BVH */
const size_t numLeafBlocks = Primitive::blocks(numPrimitives);
const size_t node_bytes = 2*numLeafBlocks*sizeof(typename BVH::AABBNode)/N;
const size_t leaf_bytes = size_t(1.2*numLeafBlocks*sizeof(Primitive));
bvh->alloc.init_estimate(node_bytes+leaf_bytes);
double t0 = bvh->preBuild(TOSTRING(isa) "::BVH" + toString(N) + "BuilderTwoLevel");
/* resize object array if scene got larger */
if (bvh->objects.size() < num) bvh->objects.resize(num);
if (builders.size() < num) builders.resize(num);
resizeRefsList ();
nextRef.store(0);
/* create acceleration structures */
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
{
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
{
Mesh* mesh = scene->getSafe<Mesh>(objectID);
/* ignore meshes we do not support */
if (mesh == nullptr || mesh->numTimeSteps != 1)
continue;
if (isSmallGeometry(mesh)) {
setupSmallBuildRefBuilder (objectID, mesh);
} else {
setupLargeBuildRefBuilder (objectID, mesh);
}
}
});
/* parallel build of acceleration structures */
parallel_for(size_t(0), num, [&] (const range<size_t>& r)
{
for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
{
/* ignore if no triangle mesh or not enabled */
Mesh* mesh = scene->getSafe<Mesh>(objectID);
if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1)
continue;
builders[objectID]->attachBuildRefs (this);
}
});
#if PROFILE
double d0 = getSeconds();
#endif
/* fast path for single geometry scenes */
if (nextRef == 1) {
bvh->set(refs[0].node,LBBox3fa(refs[0].bounds()),numPrimitives);
}
else
{
/* open all large nodes */
refs.resize(nextRef);
/* this probably needs some more tuning */
const size_t extSize = max(max((size_t)SPLIT_MIN_EXT_SPACE,refs.size()*SPLIT_MEMORY_RESERVE_SCALE),size_t((float)numPrimitives / SPLIT_MEMORY_RESERVE_FACTOR));
#if !ENABLE_DIRECT_SAH_MERGE_BUILDER
#if ENABLE_OPEN_SEQUENTIAL
open_sequential(extSize);
#endif
/* compute PrimRefs */
prims.resize(refs.size());
#endif
{
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t i=r.begin(); i<r.end(); i++) {
pinfo.add_center2(refs[i]);
}
return pinfo;
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
#else
const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo {
PrimInfo pinfo(empty);
for (size_t i=r.begin(); i<r.end(); i++) {
pinfo.add_center2(refs[i]);
prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
}
return pinfo;
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
#endif
/* skip if all objects where empty */
if (pinfo.size() == 0)
bvh->set(BVH::emptyNode,empty,0);
/* otherwise build toplevel hierarchy */
else
{
/* settings for BVH build */
GeneralBVHBuilder::Settings settings;
settings.branchingFactor = N;
settings.maxDepth = BVH::maxBuildDepthLeaf;
settings.logBlockSize = bsr(N);
settings.minLeafSize = 1;
settings.maxLeafSize = 1;
settings.travCost = 1.0f;
settings.intCost = 1.0f;
settings.singleThreadThreshold = singleThreadThreshold;
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
refs.resize(extSize);
NodeRef root = BVHBuilderBinnedOpenMergeSAH::build<NodeRef,BuildRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
[&] (const BuildRef* refs, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
assert(range.size() == 1);
return (NodeRef) refs[range.begin()].node;
},
[&] (BuildRef &bref, BuildRef *refs) -> size_t {
return openBuildRef(bref,refs);
},
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
refs.data(),extSize,pinfo,settings);
#else
NodeRef root = BVHBuilderBinnedSAH::build<NodeRef>(
typename BVH::CreateAlloc(bvh),
typename BVH::AABBNode::Create2(),
typename BVH::AABBNode::Set2(),
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> NodeRef {
assert(range.size() == 1);
return (NodeRef) prims[range.begin()].ID();
},
[&] (size_t dn) { bvh->scene->progressMonitor(0); },
prims.data(),pinfo,settings);
#endif
bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives);
}
}
}
bvh->alloc.cleanup();
bvh->postBuild(t0);
#if PROFILE
double d1 = getSeconds();
std::cout << "TOP_LEVEL OPENING/REBUILD TIME " << 1000.0*(d1-d0) << " ms" << std::endl;
#endif
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::deleteGeometry(size_t geomID)
{
if (geomID >= bvh->objects.size()) return;
if (builders[geomID]) builders[geomID].reset();
delete bvh->objects [geomID]; bvh->objects [geomID] = nullptr;
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::clear()
{
for (size_t i=0; i<bvh->objects.size(); i++)
if (bvh->objects[i]) bvh->objects[i]->clear();
for (size_t i=0; i<builders.size(); i++)
if (builders[i]) builders[i].reset();
refs.clear();
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::open_sequential(const size_t extSize)
{
if (refs.size() == 0)
return;
refs.reserve(extSize);
#if 1
for (size_t i=0;i<refs.size();i++)
{
NodeRef ref = refs[i].node;
if (ref.isAABBNode())
BVH::prefetch(ref);
}
#endif
std::make_heap(refs.begin(),refs.end());
while (refs.size()+N-1 <= extSize)
{
std::pop_heap (refs.begin(),refs.end());
NodeRef ref = refs.back().node;
if (ref.isLeaf()) break;
refs.pop_back();
AABBNode* node = ref.getAABBNode();
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVH::emptyNode) continue;
refs.push_back(BuildRef(node->bounds(i),node->child(i)));
#if 1
NodeRef ref_pre = node->child(i);
if (ref_pre.isAABBNode())
ref_pre.prefetch();
#endif
std::push_heap (refs.begin(),refs.end());
}
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupSmallBuildRefBuilder (size_t objectID, Mesh const * const /*mesh*/)
{
if (builders[objectID] == nullptr || // new mesh
dynamic_cast<RefBuilderSmall*>(builders[objectID].get()) == nullptr) // size change resulted in large->small change
{
builders[objectID].reset (new RefBuilderSmall(objectID));
}
}
template<int N, typename Mesh, typename Primitive>
void BVHNBuilderTwoLevel<N,Mesh,Primitive>::setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh)
{
if (bvh->objects[objectID] == nullptr || // new mesh
builders[objectID]->meshQualityChanged (mesh->quality) || // changed build quality
dynamic_cast<RefBuilderLarge*>(builders[objectID].get()) == nullptr) // size change resulted in small->large change
{
Builder* builder = nullptr;
delete bvh->objects[objectID];
createMeshAccel(objectID, builder);
builders[objectID].reset (new RefBuilderLarge(objectID, builder, mesh->quality));
}
}
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH4BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4v>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH4BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,TriangleMesh,Triangle4i>((BVH4*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,QuadMesh,Quad4v>((BVH4*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,UserGeometry,Object>((BVH4*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,Instance,InstancePrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(__AVX__)
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH8BuilderTwoLevelTriangle4vMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4v>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
Builder* BVH8BuilderTwoLevelTriangle4iMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,TriangleMesh,Triangle4i>((BVH8*)bvh,scene,TriangleMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH8BuilderTwoLevelQuadMeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,QuadMesh,Quad4v>((BVH8*)bvh,scene,QuadMesh::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH8BuilderTwoLevelVirtualSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,UserGeometry,Object>((BVH8*)bvh,scene,UserGeometry::geom_type,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH8BuilderTwoLevelInstanceSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,Instance,InstancePrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH8BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#endif
}
}

View file

@ -0,0 +1,262 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <type_traits>
#include "bvh_builder_twolevel_internal.h"
#include "bvh.h"
#include "../builders/priminfo.h"
#include "../builders/primrefgen.h"
/* new open/merge builder */
#define ENABLE_DIRECT_SAH_MERGE_BUILDER 1
#define ENABLE_OPEN_SEQUENTIAL 0
#define SPLIT_MEMORY_RESERVE_FACTOR 1000
#define SPLIT_MEMORY_RESERVE_SCALE 2
#define SPLIT_MIN_EXT_SPACE 1000
namespace embree
{
namespace isa
{
template<int N, typename Mesh, typename Primitive>
class BVHNBuilderTwoLevel : public Builder
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
__forceinline static bool isSmallGeometry(Mesh* mesh) {
return mesh->size() <= 4;
}
public:
typedef void (*createMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
struct BuildRef : public PrimRef
{
public:
__forceinline BuildRef () {}
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node)
: PrimRef(bounds,(size_t)node), node(node)
{
if (node.isLeaf())
bounds_area = 0.0f;
else
bounds_area = area(this->bounds());
}
/* used by the open/merge bvh builder */
__forceinline BuildRef (const BBox3fa& bounds, NodeRef node, const unsigned int geomID, const unsigned int numPrimitives)
: PrimRef(bounds,geomID,numPrimitives), node(node)
{
/* important for relative buildref ordering */
if (node.isLeaf())
bounds_area = 0.0f;
else
bounds_area = area(this->bounds());
}
__forceinline size_t size() const {
return primID();
}
friend bool operator< (const BuildRef& a, const BuildRef& b) {
return a.bounds_area < b.bounds_area;
}
friend __forceinline embree_ostream operator<<(embree_ostream cout, const BuildRef& ref) {
return cout << "{ lower = " << ref.lower << ", upper = " << ref.upper << ", center2 = " << ref.center2() << ", geomID = " << ref.geomID() << ", numPrimitives = " << ref.numPrimitives() << ", bounds_area = " << ref.bounds_area << " }";
}
__forceinline unsigned int numPrimitives() const { return primID(); }
public:
NodeRef node;
float bounds_area;
};
__forceinline size_t openBuildRef(BuildRef &bref, BuildRef *const refs) {
if (bref.node.isLeaf())
{
refs[0] = bref;
return 1;
}
NodeRef ref = bref.node;
unsigned int geomID = bref.geomID();
unsigned int numPrims = max((unsigned int)bref.numPrimitives() / N,(unsigned int)1);
AABBNode* node = ref.getAABBNode();
size_t n = 0;
for (size_t i=0; i<N; i++) {
if (node->child(i) == BVH::emptyNode) continue;
refs[i] = BuildRef(node->bounds(i),node->child(i),geomID,numPrims);
n++;
}
assert(n > 1);
return n;
}
/*! Constructor. */
BVHNBuilderTwoLevel (BVH* bvh, Scene* scene, Geometry::GTypeMask gtype = Mesh::geom_type, bool useMortonBuilder = false, const size_t singleThreadThreshold = DEFAULT_SINGLE_THREAD_THRESHOLD);
/*! Destructor */
~BVHNBuilderTwoLevel ();
/*! builder entry point */
void build();
void deleteGeometry(size_t geomID);
void clear();
void open_sequential(const size_t extSize);
private:
class RefBuilderBase {
public:
virtual ~RefBuilderBase () {}
virtual void attachBuildRefs (BVHNBuilderTwoLevel* builder) = 0;
virtual bool meshQualityChanged (RTCBuildQuality currQuality) = 0;
};
class RefBuilderSmall : public RefBuilderBase {
public:
RefBuilderSmall (size_t objectID)
: objectID_ (objectID) {}
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder) {
Mesh* mesh = topBuilder->scene->template getSafe<Mesh>(objectID_);
size_t meshSize = mesh->size();
assert(isSmallGeometry(mesh));
mvector<PrimRef> prefs(topBuilder->scene->device, meshSize);
auto pinfo = createPrimRefArray(mesh,objectID_,meshSize,prefs,topBuilder->bvh->scene->progressInterface);
size_t begin=0;
while (begin < pinfo.size())
{
Primitive* accel = (Primitive*) topBuilder->bvh->alloc.getCachedAllocator().malloc1(sizeof(Primitive),BVH::byteAlignment);
typename BVH::NodeRef node = BVH::encodeLeaf((char*)accel,1);
accel->fill(prefs.data(),begin,pinfo.size(),topBuilder->bvh->scene);
/* create build primitive */
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node,(unsigned int)objectID_,1);
#else
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(pinfo.geomBounds,node);
#endif
}
assert(begin == pinfo.size());
}
bool meshQualityChanged (RTCBuildQuality /*currQuality*/) {
return false;
}
size_t objectID_;
};
class RefBuilderLarge : public RefBuilderBase {
public:
RefBuilderLarge (size_t objectID, const Ref<Builder>& builder, RTCBuildQuality quality)
: objectID_ (objectID), builder_ (builder), quality_ (quality) {}
void attachBuildRefs (BVHNBuilderTwoLevel* topBuilder)
{
BVH* object = topBuilder->getBVH(objectID_); assert(object);
/* build object if it got modified */
if (topBuilder->isGeometryModified(objectID_))
builder_->build();
/* create build primitive */
if (!object->getBounds().empty())
{
#if ENABLE_DIRECT_SAH_MERGE_BUILDER
Mesh* mesh = topBuilder->getMesh(objectID_);
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root,(unsigned int)objectID_,(unsigned int)mesh->size());
#else
topBuilder->refs[topBuilder->nextRef++] = BVHNBuilderTwoLevel::BuildRef(object->getBounds(),object->root);
#endif
}
}
bool meshQualityChanged (RTCBuildQuality currQuality) {
return currQuality != quality_;
}
private:
size_t objectID_;
Ref<Builder> builder_;
RTCBuildQuality quality_;
};
void setupLargeBuildRefBuilder (size_t objectID, Mesh const * const mesh);
void setupSmallBuildRefBuilder (size_t objectID, Mesh const * const mesh);
BVH* getBVH (size_t objectID) {
return this->bvh->objects[objectID];
}
Mesh* getMesh (size_t objectID) {
return this->scene->template getSafe<Mesh>(objectID);
}
bool isGeometryModified (size_t objectID) {
return this->scene->isGeometryModified(objectID);
}
void resizeRefsList ()
{
size_t num = parallel_reduce (size_t(0), scene->size(), size_t(0),
[this](const range<size_t>& r)->size_t {
size_t c = 0;
for (auto i=r.begin(); i<r.end(); ++i) {
Mesh* mesh = scene->getSafe<Mesh>(i);
if (mesh == nullptr || mesh->numTimeSteps != 1)
continue;
size_t meshSize = mesh->size();
c += isSmallGeometry(mesh) ? Primitive::blocks(meshSize) : 1;
}
return c;
},
std::plus<size_t>()
);
if (refs.size() < num) {
refs.resize(num);
}
}
void createMeshAccel (size_t geomID, Builder*& builder)
{
bvh->objects[geomID] = new BVH(Primitive::type,scene);
BVH* accel = bvh->objects[geomID];
auto mesh = scene->getSafe<Mesh>(geomID);
if (nullptr == mesh) {
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"geomID does not return correct type");
return;
}
__internal_two_level_builder__::MeshBuilder<N,Mesh,Primitive>()(accel, mesh, geomID, this->gtype, this->useMortonBuilder_, builder);
}
using BuilderList = std::vector<std::unique_ptr<RefBuilderBase>>;
BuilderList builders;
BVH* bvh;
Scene* scene;
mvector<BuildRef> refs;
mvector<PrimRef> prims;
std::atomic<int> nextRef;
const size_t singleThreadThreshold;
Geometry::GTypeMask gtype;
bool useMortonBuilder_ = false;
};
}
}

View file

@ -0,0 +1,304 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
namespace embree
{
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4MeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderMortonGeneral,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshBuilderSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4iMeshRefitSAH,void* COMMA TriangleMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderMortonGeneral,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshBuilderSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vMeshRefitSAH,void* COMMA QuadMesh* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderMortonGeneral,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshBuilderSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8VirtualMeshRefitSAH,void* COMMA UserGeometry* COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderMortonGeneral,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshBuilderSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMeshRefitSAH,void* COMMA Instance* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderMortonGeneral,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshBuilderSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMeshRefitSAH,void* COMMA InstanceArray* COMMA Geometry::GTypeMask COMMA unsigned int COMMA size_t)
namespace isa
{
namespace __internal_two_level_builder__ {
template<int N, typename Mesh, typename Primitive>
struct MortonBuilder {};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,TriangleMesh,Triangle4i> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,QuadMesh,Quad4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,UserGeometry,Object> {
MortonBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<4,Instance,InstancePrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<4,InstanceArray,InstanceArrayPrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,TriangleMesh,Triangle4i> {
MortonBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,QuadMesh,Quad4v> {
MortonBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,UserGeometry,Object> {
MortonBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderMortonGeneral(bvh,mesh,geomID,0);}
};
template<>
struct MortonBuilder<8,Instance,InstancePrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<>
struct MortonBuilder<8,InstanceArray,InstanceArrayPrimitive> {
MortonBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderMortonGeneral(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct SAHBuilder {};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,TriangleMesh,Triangle4i> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,QuadMesh,Quad4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,UserGeometry,Object> {
SAHBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<4,Instance,InstancePrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<4,InstanceArray,InstanceArrayPrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,TriangleMesh,Triangle4i> {
SAHBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,QuadMesh,Quad4v> {
SAHBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,UserGeometry,Object> {
SAHBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshBuilderSAH(bvh,mesh,geomID,0);}
};
template<>
struct SAHBuilder<8,Instance,InstancePrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct SAHBuilder<8,InstanceArray,InstanceArrayPrimitive> {
SAHBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshBuilderSAH(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct RefitBuilder {};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,TriangleMesh,Triangle4i> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,QuadMesh,Quad4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,UserGeometry,Object> {
RefitBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH4VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<4,Instance,InstancePrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<4,InstanceArray,InstanceArrayPrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH4InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4MeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,TriangleMesh,Triangle4i> {
RefitBuilder () {}
Builder* operator () (void* bvh, TriangleMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Triangle4iMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,QuadMesh,Quad4v> {
RefitBuilder () {}
Builder* operator () (void* bvh, QuadMesh* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8Quad4vMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,UserGeometry,Object> {
RefitBuilder () {}
Builder* operator () (void* bvh, UserGeometry* mesh, size_t geomID, Geometry::GTypeMask /*gtype*/) { return BVH8VirtualMeshRefitSAH(bvh,mesh,geomID,0);}
};
template<>
struct RefitBuilder<8,Instance,InstancePrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, Instance* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<>
struct RefitBuilder<8,InstanceArray,InstanceArrayPrimitive> {
RefitBuilder () {}
Builder* operator () (void* bvh, InstanceArray* mesh, size_t geomID, Geometry::GTypeMask gtype) { return BVH8InstanceArrayMeshRefitSAH(bvh,mesh,gtype,geomID,0);}
};
template<int N, typename Mesh, typename Primitive>
struct MeshBuilder {
MeshBuilder () {}
void operator () (void* bvh, Mesh* mesh, size_t geomID, Geometry::GTypeMask gtype, bool useMortonBuilder, Builder*& builder) {
if(useMortonBuilder) {
builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype);
return;
}
switch (mesh->quality) {
case RTC_BUILD_QUALITY_LOW: builder = MortonBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
case RTC_BUILD_QUALITY_MEDIUM:
case RTC_BUILD_QUALITY_HIGH: builder = SAHBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
case RTC_BUILD_QUALITY_REFIT: builder = RefitBuilder<N,Mesh,Primitive>()(bvh,mesh,geomID,gtype); break;
default: throw_RTCError(RTC_ERROR_UNKNOWN,"invalid build quality");
}
}
};
}
}
}

View file

@ -0,0 +1,377 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_collider.h"
#include "../geometry/triangle_triangle_intersector.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
namespace isa
{
#define CSTAT(x)
size_t parallel_depth_threshold = 3;
CSTAT(std::atomic<size_t> bvh_collide_traversal_steps(0));
CSTAT(std::atomic<size_t> bvh_collide_leaf_pairs(0));
CSTAT(std::atomic<size_t> bvh_collide_leaf_iterations(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections1(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections2(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections3(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections4(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections5(0));
CSTAT(std::atomic<size_t> bvh_collide_prim_intersections(0));
struct Collision
{
__forceinline Collision() {}
__forceinline Collision (unsigned geomID0, unsigned primID0, unsigned geomID1, unsigned primID1)
: geomID0(geomID0), primID0(primID0), geomID1(geomID1), primID1(primID1) {}
unsigned geomID0;
unsigned primID0;
unsigned geomID1;
unsigned primID1;
};
template<int N>
__forceinline size_t overlap(const BBox3fa& box0, const typename BVHN<N>::AABBNode& node1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),node1.lower_x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),node1.lower_y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),node1.lower_z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),node1.upper_x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),node1.upper_y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),node1.upper_z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
template<int N>
__forceinline size_t overlap(const BBox3fa& box0, const BBox<Vec3<vfloat<N>>>& box1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x),box1.lower.x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y),box1.lower.y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z),box1.lower.z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x),box1.upper.x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y),box1.upper.y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z),box1.upper.z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
template<int N>
__forceinline size_t overlap(const BBox<Vec3<vfloat<N>>>& box0, size_t i, const BBox<Vec3<vfloat<N>>>& box1)
{
const vfloat<N> lower_x = max(vfloat<N>(box0.lower.x[i]),box1.lower.x);
const vfloat<N> lower_y = max(vfloat<N>(box0.lower.y[i]),box1.lower.y);
const vfloat<N> lower_z = max(vfloat<N>(box0.lower.z[i]),box1.lower.z);
const vfloat<N> upper_x = min(vfloat<N>(box0.upper.x[i]),box1.upper.x);
const vfloat<N> upper_y = min(vfloat<N>(box0.upper.y[i]),box1.upper.y);
const vfloat<N> upper_z = min(vfloat<N>(box0.upper.z[i]),box1.upper.z);
return movemask((lower_x <= upper_x) & (lower_y <= upper_y) & (lower_z <= upper_z));
}
bool intersect_triangle_triangle (Scene* scene0, unsigned geomID0, unsigned primID0, Scene* scene1, unsigned geomID1, unsigned primID1)
{
CSTAT(bvh_collide_prim_intersections1++);
const TriangleMesh* mesh0 = scene0->get<TriangleMesh>(geomID0);
const TriangleMesh* mesh1 = scene1->get<TriangleMesh>(geomID1);
const TriangleMesh::Triangle& tri0 = mesh0->triangle(primID0);
const TriangleMesh::Triangle& tri1 = mesh1->triangle(primID1);
/* special culling for scene intersection with itself */
if (scene0 == scene1 && geomID0 == geomID1)
{
/* ignore self intersections */
if (primID0 == primID1)
return false;
}
CSTAT(bvh_collide_prim_intersections2++);
if (scene0 == scene1 && geomID0 == geomID1)
{
/* ignore intersection with topological neighbors */
const vint4 t0(tri0.v[0],tri0.v[1],tri0.v[2],tri0.v[2]);
if (any(vint4(tri1.v[0]) == t0)) return false;
if (any(vint4(tri1.v[1]) == t0)) return false;
if (any(vint4(tri1.v[2]) == t0)) return false;
}
CSTAT(bvh_collide_prim_intersections3++);
const Vec3fa a0 = mesh0->vertex(tri0.v[0]);
const Vec3fa a1 = mesh0->vertex(tri0.v[1]);
const Vec3fa a2 = mesh0->vertex(tri0.v[2]);
const Vec3fa b0 = mesh1->vertex(tri1.v[0]);
const Vec3fa b1 = mesh1->vertex(tri1.v[1]);
const Vec3fa b2 = mesh1->vertex(tri1.v[2]);
return TriangleTriangleIntersector::intersect_triangle_triangle(a0,a1,a2,b0,b1,b2);
}
template<int N>
__forceinline void BVHNColliderUserGeom<N>::processLeaf(NodeRef node0, NodeRef node1)
{
Collision collisions[16];
size_t num_collisions = 0;
size_t N0; Object* leaf0 = (Object*) node0.leaf(N0);
size_t N1; Object* leaf1 = (Object*) node1.leaf(N1);
for (size_t i=0; i<N0; i++) {
for (size_t j=0; j<N1; j++) {
const unsigned geomID0 = leaf0[i].geomID();
const unsigned primID0 = leaf0[i].primID();
const unsigned geomID1 = leaf1[j].geomID();
const unsigned primID1 = leaf1[j].primID();
if (this->scene0 == this->scene1 && geomID0 == geomID1 && primID0 == primID1) continue;
collisions[num_collisions++] = Collision(geomID0,primID0,geomID1,primID1);
if (num_collisions == 16) {
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
num_collisions = 0;
}
}
}
if (num_collisions)
this->callback(this->userPtr,(RTCCollision*)&collisions,num_collisions);
}
template<int N>
void BVHNCollider<N>::collide_recurse(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1, size_t depth0, size_t depth1)
{
CSTAT(bvh_collide_traversal_steps++);
if (unlikely(ref0.isLeaf())) {
if (unlikely(ref1.isLeaf())) {
CSTAT(bvh_collide_leaf_pairs++);
processLeaf(ref0,ref1);
return;
} else goto recurse_node1;
} else {
if (unlikely(ref1.isLeaf())) {
goto recurse_node0;
} else {
if (area(bounds0) > area(bounds1)) {
goto recurse_node0;
}
else {
goto recurse_node1;
}
}
}
{
recurse_node0:
AABBNode* node0 = ref0.getAABBNode();
size_t mask = overlap<N>(bounds1,*node0);
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
//for (size_t i=0; i<N; i++) {
#if 0
if (depth0 < parallel_depth_threshold)
{
parallel_for(size_t(N), [&] ( size_t i ) {
if (mask & ( 1 << i)) {
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
}
});
}
else
#endif
{
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
BVHN<N>::prefetch(node0->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(node0->child(i),node0->bounds(i),ref1,bounds1,depth0+1,depth1);
}
}
return;
}
{
recurse_node1:
AABBNode* node1 = ref1.getAABBNode();
size_t mask = overlap<N>(bounds0,*node1);
//for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
//for (size_t i=0; i<N; i++) {
#if 0
if (depth1 < parallel_depth_threshold)
{
parallel_for(size_t(N), [&] ( size_t i ) {
if (mask & ( 1 << i)) {
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
}
});
}
else
#endif
{
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
BVHN<N>::prefetch(node1->child(i),BVH_FLAG_ALIGNED_NODE);
collide_recurse(ref0,bounds0,node1->child(i),node1->bounds(i),depth0,depth1+1);
}
}
return;
}
}
template<int N>
void BVHNCollider<N>::split(const CollideJob& job, jobvector& jobs)
{
if (unlikely(job.ref0.isLeaf())) {
if (unlikely(job.ref1.isLeaf())) {
jobs.push_back(job);
return;
} else goto recurse_node1;
} else {
if (unlikely(job.ref1.isLeaf())) {
goto recurse_node0;
} else {
if (area(job.bounds0) > area(job.bounds1)) {
goto recurse_node0;
}
else {
goto recurse_node1;
}
}
}
{
recurse_node0:
const AABBNode* node0 = job.ref0.getAABBNode();
size_t mask = overlap<N>(job.bounds1,*node0);
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
jobs.push_back(CollideJob(node0->child(i),node0->bounds(i),job.depth0+1,job.ref1,job.bounds1,job.depth1));
}
return;
}
{
recurse_node1:
const AABBNode* node1 = job.ref1.getAABBNode();
size_t mask = overlap<N>(job.bounds0,*node1);
for (size_t m=mask, i=bsf(m); m!=0; m=btc(m,i), i=bsf(m)) {
jobs.push_back(CollideJob(job.ref0,job.bounds0,job.depth0,node1->child(i),node1->bounds(i),job.depth1+1));
}
return;
}
}
template<int N>
void BVHNCollider<N>::collide_recurse_entry(NodeRef ref0, const BBox3fa& bounds0, NodeRef ref1, const BBox3fa& bounds1)
{
CSTAT(bvh_collide_traversal_steps = 0);
CSTAT(bvh_collide_leaf_pairs = 0);
CSTAT(bvh_collide_leaf_iterations = 0);
CSTAT(bvh_collide_prim_intersections1 = 0);
CSTAT(bvh_collide_prim_intersections2 = 0);
CSTAT(bvh_collide_prim_intersections3 = 0);
CSTAT(bvh_collide_prim_intersections4 = 0);
CSTAT(bvh_collide_prim_intersections5 = 0);
CSTAT(bvh_collide_prim_intersections = 0);
#if 0
collide_recurse(ref0,bounds0,ref1,bounds1,0,0);
#else
const int M = 2048;
jobvector jobs[2];
jobs[0].reserve(M);
jobs[1].reserve(M);
jobs[0].push_back(CollideJob(ref0,bounds0,0,ref1,bounds1,0));
int source = 0;
int target = 1;
/* try to split job until job list is full */
while (jobs[source].size()+8 <= M)
{
for (size_t i=0; i<jobs[source].size(); i++)
{
const CollideJob& job = jobs[source][i];
size_t remaining = jobs[source].size()-i;
if (jobs[target].size()+remaining+8 > M) {
jobs[target].push_back(job);
} else {
split(job,jobs[target]);
}
}
/* stop splitting jobs if we reached only leaves and cannot make progress anymore */
if (jobs[target].size() == jobs[source].size())
break;
jobs[source].resize(0);
std::swap(source,target);
}
/* parallel processing of all jobs */
parallel_for(size_t(jobs[source].size()), [&] ( size_t i ) {
CollideJob& j = jobs[source][i];
collide_recurse(j.ref0,j.bounds0,j.ref1,j.bounds1,j.depth0,j.depth1);
});
#endif
CSTAT(PRINT(bvh_collide_traversal_steps));
CSTAT(PRINT(bvh_collide_leaf_pairs));
CSTAT(PRINT(bvh_collide_leaf_iterations));
CSTAT(PRINT(bvh_collide_prim_intersections1));
CSTAT(PRINT(bvh_collide_prim_intersections2));
CSTAT(PRINT(bvh_collide_prim_intersections3));
CSTAT(PRINT(bvh_collide_prim_intersections4));
CSTAT(PRINT(bvh_collide_prim_intersections5));
CSTAT(PRINT(bvh_collide_prim_intersections));
}
template<int N>
void BVHNColliderUserGeom<N>::collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr)
{
BVHNColliderUserGeom<N>(bvh0->scene,bvh1->scene,callback,userPtr).
collide_recurse_entry(bvh0->root,bvh0->bounds.bounds(),bvh1->root,bvh1->bounds.bounds());
}
#if defined (EMBREE_LOWEST_ISA)
struct collision_regression_test : public RegressionTest
{
collision_regression_test(const char* name) : RegressionTest(name) {
registerRegressionTest(this);
}
bool run ()
{
bool passed = true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(-0.008815f, 0.041848f, -2.49875e-06f), Vec3fa(-0.008276f, 0.053318f, -2.49875e-06f), Vec3fa(0.003023f, 0.048969f, -2.49875e-06f),
Vec3fa(0.00245f, 0.037612f, -2.49875e-06f), Vec3fa(0.01434f, 0.042634f, -2.49875e-06f), Vec3fa(0.013499f, 0.031309f, -2.49875e-06f)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,1),Vec3fa(0,1,1)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,1),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,-0.1f),Vec3fa(1,0,1),Vec3fa(0,1,1)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0,0,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0,0),Vec3fa(0,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(0.1f,-0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0), Vec3fa(-0.1f,0.1f,0),Vec3fa(0.5f,0.1f,0),Vec3fa(0.1f,0.5f,0)) == true;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa(-1,1,0) + Vec3fa(0,0,0),Vec3fa(-1,1,0) + Vec3fa(0.1f,0,0),Vec3fa(-1,1,0) + Vec3fa(0,0.1f,0)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa( 2,0.5f,0) + Vec3fa(0,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0.1f,0,0),Vec3fa( 2,0.5f,0) + Vec3fa(0,0.1f,0)) == false;
passed &= TriangleTriangleIntersector::intersect_triangle_triangle (Vec3fa(0,0,0),Vec3fa(1,0,0),Vec3fa(0,1,0),
Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0.1f,0,0),Vec3fa(0.5f,-2.0f,0) + Vec3fa(0,0.1f,0)) == false;
return passed;
}
};
collision_regression_test collision_regression("collision_regression_test");
#endif
////////////////////////////////////////////////////////////////////////////////
/// Collider Definitions
////////////////////////////////////////////////////////////////////////////////
DEFINE_COLLIDER(BVH4ColliderUserGeom,BVHNColliderUserGeom<4>);
#if defined(__AVX__)
DEFINE_COLLIDER(BVH8ColliderUserGeom,BVHNColliderUserGeom<8>);
#endif
}
}

View file

@ -0,0 +1,72 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../geometry/trianglev.h"
#include "../geometry/object.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNCollider
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
struct CollideJob
{
CollideJob () {}
CollideJob (NodeRef ref0, const BBox3fa& bounds0, size_t depth0,
NodeRef ref1, const BBox3fa& bounds1, size_t depth1)
: ref0(ref0), bounds0(bounds0), depth0(depth0), ref1(ref1), bounds1(bounds1), depth1(depth1) {}
NodeRef ref0;
BBox3fa bounds0;
size_t depth0;
NodeRef ref1;
BBox3fa bounds1;
size_t depth1;
};
typedef vector_t<CollideJob, aligned_allocator<CollideJob,16>> jobvector;
void split(const CollideJob& job, jobvector& jobs);
public:
__forceinline BVHNCollider (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
: scene0(scene0), scene1(scene1), callback(callback), userPtr(userPtr) {}
public:
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1) = 0;
void collide_recurse(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1, size_t depth0, size_t depth1);
void collide_recurse_entry(NodeRef node0, const BBox3fa& bounds0, NodeRef node1, const BBox3fa& bounds1);
protected:
Scene* scene0;
Scene* scene1;
RTCCollideFunc callback;
void* userPtr;
};
template<int N>
class BVHNColliderUserGeom : public BVHNCollider<N>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
__forceinline BVHNColliderUserGeom (Scene* scene0, Scene* scene1, RTCCollideFunc callback, void* userPtr)
: BVHNCollider<N>(scene0,scene1,callback,userPtr) {}
virtual void processLeaf(NodeRef leaf0, NodeRef leaf1);
public:
static void collide(BVH* __restrict__ bvh0, BVH* __restrict__ bvh1, RTCCollideFunc callback, void* userPtr);
};
}
}

View file

@ -0,0 +1,21 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../common/isa.h"
#include "../common/accel.h"
#include "../common/scene.h"
#include "../geometry/curve_intersector_virtual.h"
namespace embree
{
/*! BVH instantiations */
class BVHFactory
{
public:
enum class BuildVariant { STATIC, DYNAMIC, HIGH_QUALITY };
enum class IntersectVariant { FAST, ROBUST };
};
}

View file

@ -0,0 +1,322 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector1.h"
#include "node_intersector1.h"
#include "bvh_traverser1.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/instance_array_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
namespace embree
{
namespace isa
{
template<int N, int types, bool robust, typename PrimitiveIntersector1>
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::intersect(const Accel::Intersectors* __restrict__ This,
RayHit& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
/* perform per ray precalculations required by the primitive intersector */
Precalculations pre(ray, bvh);
/* stack state */
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack+stackSize;
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
if (bvh->root == BVH::emptyNode)
return;
/* filter out invalid rays */
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (!ray.valid()) return;
#endif
/* verify correct input */
assert(ray.valid());
assert(ray.tnear() >= 0.0f);
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
/* load the ray into SIMD registers */
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes,1,1,1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersector1::intersect(This, pre, ray, context, prim, num, tray, lazy_node);
tray.tfar = ray.tfar;
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int types, bool robust, typename PrimitiveIntersector1>
void BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::occluded(const Accel::Intersectors* __restrict__ This,
Ray& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
/* early out for already occluded rays */
if (unlikely(ray.tfar < 0.0f))
return;
/* perform per ray precalculations required by the primitive intersector */
Precalculations pre(ray, bvh);
/* stack state */
NodeRef stack[stackSize]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSize;
stack[0] = bvh->root;
/* filter out invalid rays */
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (!ray.valid()) return;
#endif
/* verify correct input */
assert(ray.valid());
assert(ray.tnear() >= 0.0f);
assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f));
/* load the ray into SIMD registers */
TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f));
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N, types> nodeTraverser;
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes,1,1,1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersector1::occluded(This, pre, ray, context, prim, num, tray, lazy_node)) {
ray.tfar = neg_inf;
break;
}
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
*stackPtr = (NodeRef)lazy_node;
stackPtr++;
}
}
}
template<int N, int types, bool robust, typename PrimitiveIntersector1>
struct PointQueryDispatch
{
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
typedef typename PrimitiveIntersector1::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
{
const BVH* __restrict__ bvh = (const BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return false;
/* stack state */
StackItemT<NodeRef> stack[stackSize]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack+1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack+stackSize;
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
/* verify correct input */
assert(!(types & BVH_MB) || (query->time >= 0.0f && query->time <= 1.0f));
/* load the point query into SIMD registers */
TravPointQuery<N> tquery(query->p, context->query_radius);
/* initialize the node traverser */
BVHNNodeTraverser1Hit<N,types> nodeTraverser;
bool changed = false;
float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
? query->radius * query->radius
: dot(context->query_radius, context->query_radius);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > cull_radius))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(point_query.trav_nodes,1,1,1);
bool nodeIntersected;
if (likely(context->query_type == POINT_QUERY_TYPE_SPHERE)) {
nodeIntersected = BVHNNodePointQuerySphere1<N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
} else {
nodeIntersected = BVHNNodePointQueryAABB1 <N, types>::pointQuery(cur, tquery, query->time, tNear, mask);
}
if (unlikely(!nodeIntersected)) { STAT3(point_query.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
nodeTraverser.traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(point_query.trav_leaves,1,1,1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersector1::pointQuery(This, query, context, prim, num, tquery, lazy_node))
{
changed = true;
tquery.rad = context->query_radius;
cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE
? query->radius * query->radius
: dot(context->query_radius, context->query_radius);
}
/* push lazy node onto stack */
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
return changed;
}
};
/* disable point queries for not yet supported geometry types */
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, VirtualCurveIntersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, SubdivPatch1Intersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust>
struct PointQueryDispatch<N, types, robust, SubdivPatch1MBIntersector1> {
static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { return false; }
};
template<int N, int types, bool robust, typename PrimitiveIntersector1>
bool BVHNIntersector1<N, types, robust, PrimitiveIntersector1>::pointQuery(
const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context)
{
return PointQueryDispatch<N, types, robust, PrimitiveIntersector1>::pointQuery(This, query, context);
}
}
}

View file

@ -0,0 +1,34 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../common/ray.h"
#include "../common/point_query.h"
namespace embree
{
namespace isa
{
/*! BVH single ray intersector. */
template<int N, int types, bool robust, typename PrimitiveIntersector1>
class BVHNIntersector1
{
/* shortcuts for frequently used types */
typedef typename PrimitiveIntersector1::Precalculations Precalculations;
typedef typename PrimitiveIntersector1::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
public:
static void intersect (const Accel::Intersectors* This, RayHit& ray, RayQueryContext* context);
static void occluded (const Accel::Intersectors* This, Ray& ray, RayQueryContext* context);
static bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
};
}
}

View file

@ -0,0 +1,64 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector1.cpp"
namespace embree
{
namespace isa
{
int getISA() {
return VerifyMultiTargetLinking::getISA();
}
////////////////////////////////////////////////////////////////////////////////
/// BVH4Intersector1 Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersector1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMvIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<QuadMiMBIntersector1Moeller <4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<QuadMiMBIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1Intersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector1>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR1(BVH4SubdivPatch1MBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubdivPatch1MBIntersector1>));
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<ObjectIntersector1<false>> >));
IF_ENABLED_USER(DEFINE_INTERSECTOR1(BVH4VirtualMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<ObjectIntersector1<true>> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR1(BVH4InstanceArrayMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceArrayIntersector1MB> >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersector1Pluecker<4 COMMA true> >));
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA SubGridMBIntersector1Pluecker<4 COMMA true> >));
}
}

View file

@ -0,0 +1,918 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.h"
#include "bvh_traverser1.h"
#include "node_intersector1.h"
#include "node_intersector_packet.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/instance_array_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
#define SWITCH_DURING_DOWN_TRAVERSAL 1
#define FORCE_SINGLE_MODE 0
#define ENABLE_FAST_COHERENT_CODEPATHS 1
namespace embree
{
namespace isa
{
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayHitK<K>& ray,
const TravRayK<K, robust>& tray,
RayQueryContext* context)
{
/* stack state */
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
stack[0].ptr = root;
stack[0].dist = neg_inf;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
tray1.tfar = ray.tfar[k];
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
RayQueryContext* __restrict__ context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
intersectCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
#endif
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
if (single)
{
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
for (; valid_bits!=0; ) {
const size_t i = bscf(valid_bits);
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
}
return;
}
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
/* test whether we have ray with opposing direction signs in the packet */
bool split = false;
{
size_t bits = valid_bits;
vbool<K> vsplit( false );
do
{
const size_t valid_index = bsf(bits);
vbool<K> octant_valid = octant[valid_index] == octant;
bits &= ~(size_t)movemask(octant_valid);
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
} while (bits);
if (any(vsplit)) split = true;
}
do
{
const size_t valid_index = bsf(valid_bits);
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
const vint<K> count_diff_octant = \
((diff_octant >> 2) & 1) +
((diff_octant >> 1) & 1) +
((diff_octant >> 0) & 1);
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
octant = select(octant_valid,vint<K>(0xffffffff),octant);
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
intersect1(This, bvh, cur, i, pre, ray, tray, context);
}
tray.tfar = min(tray.tfar, ray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
size_t num_child_hits = 0;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we choose to continue with that child if it
is closer than the current next child, or we push it onto the stack */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push cur node onto stack and continue with hit child */
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
*sptr_node = child; sptr_node++;
*sptr_near = childDist; sptr_near++;
}
}
}
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
#endif
if (unlikely(cur == BVH::emptyNode))
goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (any(sptr_near[-2] < sptr_near[-1]))
{
std::swap(sptr_near[-2],sptr_near[-1]);
std::swap(sptr_node[-2],sptr_node[-1]);
}
if (unlikely(num_child_hits >= 3))
{
if (any(sptr_near[-3] < sptr_near[-1]))
{
std::swap(sptr_near[-3],sptr_near[-1]);
std::swap(sptr_node[-3],sptr_node[-1]);
}
if (any(sptr_near[-3] < sptr_near[-2]))
{
std::swap(sptr_near[-3],sptr_near[-2]);
std::swap(sptr_node[-3],sptr_node[-2]);
}
}
}
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
} while(valid_bits);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
const vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node if behind closest hit point */
vfloat<K> curDist = *(float*)&stackPtr->dist;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active))) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
curDist = pos_inf;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const vfloat<K> childDist = fmin[i];
const NodeRef child = node->child(i);
BVHN<N>::prefetch(child);
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
*(float*)&stackPtr->dist = toScalar(curDist);
stackPtr++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
stackPtr->ptr = child;
*(float*)&stackPtr->dist = toScalar(childDist);
stackPtr++;
}
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (stackPtr[-2].dist < stackPtr[-1].dist)
std::swap(stackPtr[-2],stackPtr[-1]);
if (unlikely(num_child_hits >= 3))
{
if (stackPtr[-3].dist < stackPtr[-1].dist)
std::swap(stackPtr[-3],stackPtr[-1]);
if (stackPtr[-3].dist < stackPtr[-2].dist)
std::swap(stackPtr[-3],stackPtr[-2]);
}
}
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
/* reduce max distance interval on successful intersection */
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
{
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
frustum.template updateMaxDist<K>(tray.tfar);
}
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
} while(valid_bits);
}
// ===================================================================================================================================================================
// ===================================================================================================================================================================
// ===================================================================================================================================================================
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayK<K>& ray,
const TravRayK<K, robust>& tray,
RayQueryContext* context)
{
/* stack state */
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSizeSingle;
stack[0] = root;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
ray.tfar[k] = neg_inf;
return true;
}
if (unlikely(lazy_node)) {
*stackPtr = lazy_node;
stackPtr++;
}
}
return false;
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
occludedCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out already occluded and invalid rays */
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
const size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
vbool<K> terminated = !valid;
const vfloat<K> inf = vfloat<K>(pos_inf);
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
set(terminated, i);
}
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push 'cur' node onto stack and continue with hit child */
if (likely(cur != BVH::emptyNode)) {
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
}
if (unlikely(cur == BVH::emptyNode))
goto pop;
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
RayQueryContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid,ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vbool<K> terminated = !valid;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].mask = movemask(octant_valid);
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node of active rays have already been terminated */
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
if (unlikely(m_active == 0)) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
m_active = 0;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
//size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const NodeRef child = node->child(i);
assert(child != BVH::emptyNode);
BVHN<N>::prefetch(child);
if (likely(cur != BVH::emptyNode)) {
//num_child_hits++;
stackPtr->ptr = cur;
stackPtr->mask = m_active;
stackPtr++;
}
cur = child;
m_active = movemask(lhit);
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
#if defined(__AVX__)
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
#endif
if (unlikely(!m_active)) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
octant_valid &= !terminated;
if (unlikely(none(octant_valid))) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->mask = movemask(octant_valid);
stackPtr++;
}
}
} while(valid_bits);
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
}
}

View file

@ -0,0 +1,58 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "../common/ray.h"
#include "../common/stack_item.h"
#include "node_intersector_frustum.h"
namespace embree
{
namespace isa
{
template<int K, bool robust>
struct TravRayK;
/*! BVH hybrid packet intersector. Switches between packet and single ray traversal (optional). */
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true>
class BVHNIntersectorKHybrid
{
/* shortcuts for frequently used types */
typedef typename PrimitiveIntersectorK::Precalculations Precalculations;
typedef typename PrimitiveIntersectorK::Primitive Primitive;
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::BaseNode BaseNode;
typedef typename BVH::AABBNode AABBNode;
static const size_t stackSizeSingle = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store
static const size_t stackSizeChunk = 1+(N-1)*BVH::maxDepth;
static const size_t switchThresholdIncoherent = \
(K==4) ? 3 :
(K==8) ? ((N==4) ? 5 : 7) :
(K==16) ? 14 : // 14 seems to work best for KNL due to better ordered chunk traversal
0;
private:
static void intersect1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
RayHitK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
static bool occluded1(Accel::Intersectors* This, const BVH* bvh, NodeRef root, size_t k, Precalculations& pre,
RayK<K>& ray, const TravRayK<K, robust>& tray, RayQueryContext* context);
public:
static void intersect(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
static void occluded (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
static void intersectCoherent(vint<K>* valid, Accel::Intersectors* This, RayHitK<K>& ray, RayQueryContext* context);
static void occludedCoherent (vint<K>* valid, Accel::Intersectors* This, RayK<K>& ray, RayQueryContext* context);
};
/*! BVH packet intersector. */
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK>
class BVHNIntersectorKChunk : public BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, false> {};
}
}

View file

@ -0,0 +1,62 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.cpp"
namespace embree
{
namespace isa
{
////////////////////////////////////////////////////////////////////////////////
/// BVH4Intersector4 Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorK<4>> >));
IF_ENABLED_INSTANCE_ARRAY(DEFINE_INTERSECTOR4(BVH4InstanceArrayMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceArrayIntersectorKMB<4>> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
}
}

View file

@ -0,0 +1,229 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! BVHN AABBNode */
template<typename NodeRef, int N>
struct AABBNode_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc, size_t numChildren = 0) const
{
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const BBox3fa& bounds) const {
node.getAABBNode()->setRef(i,child);
node.getAABBNode()->setBounds(i,bounds);
}
};
struct Create2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
{
AABBNode_t* node = (AABBNode_t*) alloc.malloc0(sizeof(AABBNode_t), NodeRef::byteNodeAlignment); node->clear();
for (size_t i=0; i<num; i++) node->setBounds(i,children[i].bounds());
return NodeRef::encodeNode(node);
}
};
struct Set2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
AABBNode_t* node = ref.getAABBNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
return ref;
}
};
struct Set3
{
Set3 (FastAllocator* allocator, PrimRef* prims)
: allocator(allocator), prims(prims) {}
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
AABBNode_t* node = ref.getAABBNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
if (unlikely(precord.alloc_barrier))
{
PrimRef* begin = &prims[precord.prims.begin()];
PrimRef* end = &prims[precord.prims.end()]; // FIXME: extended end for spatial split builder!!!!!
size_t bytes = (size_t)end - (size_t)begin;
allocator->addBlock(begin,bytes);
}
return ref;
}
FastAllocator* const allocator;
PrimRef* const prims;
};
/*! Clears the node. */
__forceinline void clear() {
lower_x = lower_y = lower_z = pos_inf;
upper_x = upper_y = upper_z = neg_inf;
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets bounding box and ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const BBox3fa& bounds)
{
assert(i < N);
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRef& ref, const BBox3fa& bounds) {
setBounds(i,bounds);
children[i] = ref;
}
/*! Returns bounds of node. */
__forceinline BBox3fa bounds() const {
const Vec3fa lower(reduce_min(lower_x),reduce_min(lower_y),reduce_min(lower_z));
const Vec3fa upper(reduce_max(upper_x),reduce_max(upper_y),reduce_max(upper_z));
return BBox3fa(lower,upper);
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
const Vec3fa lower(lower_x[i],lower_y[i],lower_z[i]);
const Vec3fa upper(upper_x[i],upper_y[i],upper_z[i]);
return BBox3fa(lower,upper);
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extend(size_t i) const {
return bounds(i).size();
}
/*! Returns bounds of all children (implemented later as specializations) */
__forceinline void bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const;
/*! swap two children of the node */
__forceinline void swap(size_t i, size_t j)
{
assert(i<N && j<N);
std::swap(children[i],children[j]);
std::swap(lower_x[i],lower_x[j]);
std::swap(lower_y[i],lower_y[j]);
std::swap(lower_z[i],lower_z[j]);
std::swap(upper_x[i],upper_x[j]);
std::swap(upper_y[i],upper_y[j]);
std::swap(upper_z[i],upper_z[j]);
}
/*! swap the children of two nodes */
__forceinline static void swap(AABBNode_t* a, size_t i, AABBNode_t* b, size_t j)
{
assert(i<N && j<N);
std::swap(a->children[i],b->children[j]);
std::swap(a->lower_x[i],b->lower_x[j]);
std::swap(a->lower_y[i],b->lower_y[j]);
std::swap(a->lower_z[i],b->lower_z[j]);
std::swap(a->upper_x[i],b->upper_x[j]);
std::swap(a->upper_y[i],b->upper_y[j]);
std::swap(a->upper_z[i],b->upper_z[j]);
}
/*! compacts a node (moves empty children to the end) */
__forceinline static void compact(AABBNode_t* a)
{
/* find right most filled node */
ssize_t j=N;
for (j=j-1; j>=0; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
/* replace empty nodes with filled nodes */
for (ssize_t i=0; i<j; i++) {
if (a->child(i) == NodeRef::emptyNode) {
a->swap(i,j);
for (j=j-1; j>i; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
}
}
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! output operator */
friend embree_ostream operator<<(embree_ostream o, const AABBNode_t& n)
{
o << "AABBNode { " << embree_endl;
o << " lower_x " << n.lower_x << embree_endl;
o << " upper_x " << n.upper_x << embree_endl;
o << " lower_y " << n.lower_y << embree_endl;
o << " upper_y " << n.upper_y << embree_endl;
o << " lower_z " << n.lower_z << embree_endl;
o << " upper_z " << n.upper_z << embree_endl;
o << " children = ";
for (size_t i=0; i<N; i++) o << n.children[i] << " ";
o << embree_endl;
o << "}" << embree_endl;
return o;
}
public:
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
};
template<>
__forceinline void AABBNode_t<NodeRefPtr<4>,4>::bounds(BBox<vfloat4>& bounds0, BBox<vfloat4>& bounds1, BBox<vfloat4>& bounds2, BBox<vfloat4>& bounds3) const {
transpose(lower_x,lower_y,lower_z,vfloat4(zero),bounds0.lower,bounds1.lower,bounds2.lower,bounds3.lower);
transpose(upper_x,upper_y,upper_z,vfloat4(zero),bounds0.upper,bounds1.upper,bounds2.upper,bounds3.upper);
}
}

View file

@ -0,0 +1,255 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! Motion Blur AABBNode */
template<typename NodeRef, int N>
struct AABBNodeMB_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
struct Create
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t num, const FastAllocator::CachedAllocator& alloc) const
{
AABBNodeMB_t* node = (AABBNodeMB_t*) alloc.malloc0(sizeof(AABBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
template<typename BuildRecord>
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i].ref == NodeRef::emptyNode);
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
}
#endif
AABBNodeMB_t* node = ref.getAABBNodeMB();
LBBox3fa bounds = empty;
for (size_t i=0; i<num; i++) {
node->setRef(i,children[i].ref);
node->setBounds(i,children[i].lbounds);
bounds.extend(children[i].lbounds);
}
return NodeRecordMB(ref,bounds);
}
};
struct SetTimeRange
{
__forceinline SetTimeRange(BBox1f tbounds) : tbounds(tbounds) {}
template<typename BuildRecord>
__forceinline NodeRecordMB operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRecordMB* children, const size_t num) const
{
AABBNodeMB_t* node = ref.getAABBNodeMB();
LBBox3fa bounds = empty;
for (size_t i=0; i<num; i++) {
node->setRef(i, children[i].ref);
node->setBounds(i, children[i].lbounds, tbounds);
bounds.extend(children[i].lbounds);
}
return NodeRecordMB(ref,bounds);
}
BBox1f tbounds;
};
/*! Clears the node. */
__forceinline void clear() {
lower_x = lower_y = lower_z = vfloat<N>(pos_inf);
upper_x = upper_y = upper_z = vfloat<N>(neg_inf);
lower_dx = lower_dy = lower_dz = vfloat<N>(0.0f);
upper_dx = upper_dy = upper_dz = vfloat<N>(0.0f);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, NodeRef ref) {
children[i] = ref;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const BBox3fa& bounds0_i, const BBox3fa& bounds1_i)
{
/*! for empty bounds we have to avoid inf-inf=nan */
BBox3fa bounds0(min(bounds0_i.lower,Vec3fa(+FLT_MAX)),max(bounds0_i.upper,Vec3fa(-FLT_MAX)));
BBox3fa bounds1(min(bounds1_i.lower,Vec3fa(+FLT_MAX)),max(bounds1_i.upper,Vec3fa(-FLT_MAX)));
bounds0 = bounds0.enlarge_by(4.0f*float(ulp));
bounds1 = bounds1.enlarge_by(4.0f*float(ulp));
Vec3fa dlower = bounds1.lower-bounds0.lower;
Vec3fa dupper = bounds1.upper-bounds0.upper;
lower_x[i] = bounds0.lower.x; lower_y[i] = bounds0.lower.y; lower_z[i] = bounds0.lower.z;
upper_x[i] = bounds0.upper.x; upper_y[i] = bounds0.upper.y; upper_z[i] = bounds0.upper.z;
lower_dx[i] = dlower.x; lower_dy[i] = dlower.y; lower_dz[i] = dlower.z;
upper_dx[i] = dupper.x; upper_dy[i] = dupper.y; upper_dz[i] = dupper.z;
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds) {
setBounds(i, bounds.bounds0, bounds.bounds1);
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds) {
setBounds(i, bounds.global(tbounds));
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, NodeRef ref, const BBox3fa& bounds) {
lower_x[i] = bounds.lower.x; lower_y[i] = bounds.lower.y; lower_z[i] = bounds.lower.z;
upper_x[i] = bounds.upper.x; upper_y[i] = bounds.upper.y; upper_z[i] = bounds.upper.z;
children[i] = ref;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRecordMB4D& child)
{
setRef(i, child.ref);
setBounds(i, child.lbounds, child.dt);
}
/*! Return bounding box for time 0 */
__forceinline BBox3fa bounds0(size_t i) const {
return BBox3fa(Vec3fa(lower_x[i],lower_y[i],lower_z[i]),
Vec3fa(upper_x[i],upper_y[i],upper_z[i]));
}
/*! Return bounding box for time 1 */
__forceinline BBox3fa bounds1(size_t i) const {
return BBox3fa(Vec3fa(lower_x[i]+lower_dx[i],lower_y[i]+lower_dy[i],lower_z[i]+lower_dz[i]),
Vec3fa(upper_x[i]+upper_dx[i],upper_y[i]+upper_dy[i],upper_z[i]+upper_dz[i]));
}
/*! Returns bounds of node. */
__forceinline BBox3fa bounds() const {
return BBox3fa(Vec3fa(reduce_min(min(lower_x,lower_x+lower_dx)),
reduce_min(min(lower_y,lower_y+lower_dy)),
reduce_min(min(lower_z,lower_z+lower_dz))),
Vec3fa(reduce_max(max(upper_x,upper_x+upper_dx)),
reduce_max(max(upper_y,upper_y+upper_dy)),
reduce_max(max(upper_z,upper_z+upper_dz))));
}
/*! Return bounding box of child i */
__forceinline BBox3fa bounds(size_t i) const {
return merge(bounds0(i),bounds1(i));
}
/*! Return linear bounding box of child i */
__forceinline LBBox3fa lbounds(size_t i) const {
return LBBox3fa(bounds0(i),bounds1(i));
}
/*! Return bounding box of child i at specified time */
__forceinline BBox3fa bounds(size_t i, float time) const {
return lerp(bounds0(i),bounds1(i),time);
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i) const {
return lbounds(i).expectedHalfArea();
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i, const BBox1f& t0t1) const {
return lbounds(i).expectedHalfArea(t0t1);
}
/*! swap two children of the node */
__forceinline void swap(size_t i, size_t j)
{
assert(i<N && j<N);
std::swap(children[i],children[j]);
std::swap(lower_x[i],lower_x[j]);
std::swap(upper_x[i],upper_x[j]);
std::swap(lower_y[i],lower_y[j]);
std::swap(upper_y[i],upper_y[j]);
std::swap(lower_z[i],lower_z[j]);
std::swap(upper_z[i],upper_z[j]);
std::swap(lower_dx[i],lower_dx[j]);
std::swap(upper_dx[i],upper_dx[j]);
std::swap(lower_dy[i],lower_dy[j]);
std::swap(upper_dy[i],upper_dy[j]);
std::swap(lower_dz[i],lower_dz[j]);
std::swap(upper_dz[i],upper_dz[j]);
}
/*! compacts a node (moves empty children to the end) */
__forceinline static void compact(AABBNodeMB_t* a)
{
/* find right most filled node */
ssize_t j=N;
for (j=j-1; j>=0; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
/* replace empty nodes with filled nodes */
for (ssize_t i=0; i<j; i++) {
if (a->child(i) == NodeRef::emptyNode) {
a->swap(i,j);
for (j=j-1; j>i; j--)
if (a->child(j) != NodeRef::emptyNode)
break;
}
}
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! stream output operator */
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB_t& n)
{
cout << "AABBNodeMB {" << embree_endl;
for (size_t i=0; i<N; i++)
{
const BBox3fa b0 = n.bounds0(i);
const BBox3fa b1 = n.bounds1(i);
cout << " child" << i << " { " << embree_endl;
cout << " bounds0 = " << b0 << ", " << embree_endl;
cout << " bounds1 = " << b1 << ", " << embree_endl;
cout << " }";
}
cout << "}";
return cout;
}
public:
vfloat<N> lower_x; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_x; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_y; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_y; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_z; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_z; //!< Z dimension of upper bounds of all N children.
vfloat<N> lower_dx; //!< X dimension of lower bounds of all N children.
vfloat<N> upper_dx; //!< X dimension of upper bounds of all N children.
vfloat<N> lower_dy; //!< Y dimension of lower bounds of all N children.
vfloat<N> upper_dy; //!< Y dimension of upper bounds of all N children.
vfloat<N> lower_dz; //!< Z dimension of lower bounds of all N children.
vfloat<N> upper_dz; //!< Z dimension of upper bounds of all N children.
};
}

View file

@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_aabb_mb.h"
namespace embree
{
/*! Aligned 4D Motion Blur Node */
template<typename NodeRef, int N>
struct AABBNodeMB4D_t : public AABBNodeMB_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
using AABBNodeMB_t<NodeRef,N>::set;
typedef BVHNodeRecord<NodeRef> NodeRecord;
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
struct Create
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord*, const size_t, const FastAllocator::CachedAllocator& alloc, bool hasTimeSplits = true) const
{
if (hasTimeSplits)
{
AABBNodeMB4D_t* node = (AABBNodeMB4D_t*) alloc.malloc0(sizeof(AABBNodeMB4D_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
else
{
AABBNodeMB_t<NodeRef,N>* node = (AABBNodeMB_t<NodeRef,N>*) alloc.malloc0(sizeof(AABBNodeMB_t<NodeRef,N>),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
}
};
struct Set
{
template<typename BuildRecord>
__forceinline void operator() (const BuildRecord&, const BuildRecord*, NodeRef ref, NodeRecordMB4D* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i].ref == NodeRef::emptyNode);
assert(emptyChild == (children[i].ref == NodeRef::emptyNode));
}
#endif
if (likely(ref.isAABBNodeMB())) {
for (size_t i=0; i<num; i++)
ref.getAABBNodeMB()->set(i, children[i]);
} else {
for (size_t i=0; i<num; i++)
ref.getAABBNodeMB4D()->set(i, children[i]);
}
}
};
/*! Clears the node. */
__forceinline void clear() {
lower_t = vfloat<N>(pos_inf);
upper_t = vfloat<N>(neg_inf);
AABBNodeMB_t<NodeRef,N>::clear();
}
/*! Sets bounding box of child. */
__forceinline void setBounds(size_t i, const LBBox3fa& bounds, const BBox1f& tbounds)
{
AABBNodeMB_t<NodeRef,N>::setBounds(i, bounds.global(tbounds));
lower_t[i] = tbounds.lower;
upper_t[i] = tbounds.upper == 1.0f ? 1.0f+float(ulp) : tbounds.upper;
}
/*! Sets bounding box and ID of child. */
__forceinline void set(size_t i, const NodeRecordMB4D& child) {
AABBNodeMB_t<NodeRef,N>::setRef(i,child.ref);
setBounds(i, child.lbounds, child.dt);
}
/*! Returns the expected surface area when randomly sampling the time. */
__forceinline float expectedHalfArea(size_t i) const {
return AABBNodeMB_t<NodeRef,N>::lbounds(i).expectedHalfArea(timeRange(i));
}
/*! returns time range for specified child */
__forceinline BBox1f timeRange(size_t i) const {
return BBox1f(lower_t[i],upper_t[i]);
}
/*! stream output operator */
friend embree_ostream operator<<(embree_ostream cout, const AABBNodeMB4D_t& n)
{
cout << "AABBNodeMB4D {" << embree_endl;
for (size_t i=0; i<N; i++)
{
const BBox3fa b0 = n.bounds0(i);
const BBox3fa b1 = n.bounds1(i);
cout << " child" << i << " { " << embree_endl;
cout << " bounds0 = " << lerp(b0,b1,n.lower_t[i]) << ", " << embree_endl;
cout << " bounds1 = " << lerp(b0,b1,n.upper_t[i]) << ", " << embree_endl;
cout << " time_bounds = " << n.lower_t[i] << ", " << n.upper_t[i] << embree_endl;
cout << " }";
}
cout << "}";
return cout;
}
public:
vfloat<N> lower_t; //!< time dimension of lower bounds of all N children
vfloat<N> upper_t; //!< time dimension of upper bounds of all N children
};
}

View file

@ -0,0 +1,43 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_ref.h"
namespace embree
{
/*! BVHN Base Node */
template<typename NodeRef, int N>
struct BaseNode_t
{
/*! Clears the node. */
__forceinline void clear()
{
for (size_t i=0; i<N; i++)
children[i] = NodeRef::emptyNode;
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! verifies the node */
__forceinline bool verify() const
{
for (size_t i=0; i<N; i++) {
if (child(i) == NodeRef::emptyNode) {
for (; i<N; i++) {
if (child(i) != NodeRef::emptyNode)
return false;
}
break;
}
}
return true;
}
NodeRef children[N]; //!< Pointer to the N children (can be a node or leaf)
};
}

View file

@ -0,0 +1,98 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! Node with unaligned bounds */
template<typename NodeRef, int N>
struct OBBNode_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
{
OBBNode_t* node = (OBBNode_t*) alloc.malloc0(sizeof(OBBNode_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const OBBox3fa& bounds) const {
node.ungetAABBNode()->setRef(i,child);
node.ungetAABBNode()->setBounds(i,bounds);
}
};
/*! Clears the node. */
__forceinline void clear()
{
naabb.l.vx = Vec3fa(nan);
naabb.l.vy = Vec3fa(nan);
naabb.l.vz = Vec3fa(nan);
naabb.p = Vec3fa(nan);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets bounding box. */
__forceinline void setBounds(size_t i, const OBBox3fa& b)
{
assert(i < N);
AffineSpace3fa space = b.space;
space.p -= b.bounds.lower;
space = AffineSpace3fa::scale(1.0f/max(Vec3fa(1E-19f),b.bounds.upper-b.bounds.lower))*space;
naabb.l.vx.x[i] = space.l.vx.x;
naabb.l.vx.y[i] = space.l.vx.y;
naabb.l.vx.z[i] = space.l.vx.z;
naabb.l.vy.x[i] = space.l.vy.x;
naabb.l.vy.y[i] = space.l.vy.y;
naabb.l.vy.z[i] = space.l.vy.z;
naabb.l.vz.x[i] = space.l.vz.x;
naabb.l.vz.y[i] = space.l.vz.y;
naabb.l.vz.z[i] = space.l.vz.z;
naabb.p.x[i] = space.p.x;
naabb.p.y[i] = space.p.y;
naabb.p.z[i] = space.p.z;
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Returns the extent of the bounds of the ith child */
__forceinline Vec3fa extent(size_t i) const {
assert(i<N);
const Vec3fa vx(naabb.l.vx.x[i],naabb.l.vx.y[i],naabb.l.vx.z[i]);
const Vec3fa vy(naabb.l.vy.x[i],naabb.l.vy.y[i],naabb.l.vy.z[i]);
const Vec3fa vz(naabb.l.vz.x[i],naabb.l.vz.y[i],naabb.l.vz.z[i]);
return rsqrt(vx*vx + vy*vy + vz*vz);
}
/*! Returns reference to specified child */
__forceinline NodeRef& child(size_t i) { assert(i<N); return children[i]; }
__forceinline const NodeRef& child(size_t i) const { assert(i<N); return children[i]; }
/*! output operator */
friend embree_ostream operator<<(embree_ostream o, const OBBNode_t& n)
{
o << "UnAABBNode { " << n.naabb << " } " << embree_endl;
return o;
}
public:
AffineSpace3vf<N> naabb; //!< non-axis aligned bounding boxes (bounds are [0,1] in specified space)
};
}

View file

@ -0,0 +1,90 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
template<typename NodeRef, int N>
struct OBBNodeMB_t : public BaseNode_t<NodeRef, N>
{
using BaseNode_t<NodeRef,N>::children;
struct Create
{
__forceinline NodeRef operator() (const FastAllocator::CachedAllocator& alloc) const
{
OBBNodeMB_t* node = (OBBNodeMB_t*) alloc.malloc0(sizeof(OBBNodeMB_t),NodeRef::byteNodeAlignment); node->clear();
return NodeRef::encodeNode(node);
}
};
struct Set
{
__forceinline void operator() (NodeRef node, size_t i, NodeRef child, const LinearSpace3fa& space, const LBBox3fa& lbounds, const BBox1f dt) const {
node.ungetAABBNodeMB()->setRef(i,child);
node.ungetAABBNodeMB()->setBounds(i,space,lbounds.global(dt));
}
};
/*! Clears the node. */
__forceinline void clear()
{
space0 = one;
//b0.lower = b0.upper = Vec3fa(nan);
b1.lower = b1.upper = Vec3fa(nan);
BaseNode_t<NodeRef,N>::clear();
}
/*! Sets space and bounding boxes. */
__forceinline void setBounds(size_t i, const AffineSpace3fa& space, const LBBox3fa& lbounds) {
setBounds(i,space,lbounds.bounds0,lbounds.bounds1);
}
/*! Sets space and bounding boxes. */
__forceinline void setBounds(size_t i, const AffineSpace3fa& s0, const BBox3fa& a, const BBox3fa& c)
{
assert(i < N);
AffineSpace3fa space = s0;
space.p -= a.lower;
Vec3fa scale = 1.0f/max(Vec3fa(1E-19f),a.upper-a.lower);
space = AffineSpace3fa::scale(scale)*space;
BBox3fa a1((a.lower-a.lower)*scale,(a.upper-a.lower)*scale);
BBox3fa c1((c.lower-a.lower)*scale,(c.upper-a.lower)*scale);
space0.l.vx.x[i] = space.l.vx.x; space0.l.vx.y[i] = space.l.vx.y; space0.l.vx.z[i] = space.l.vx.z;
space0.l.vy.x[i] = space.l.vy.x; space0.l.vy.y[i] = space.l.vy.y; space0.l.vy.z[i] = space.l.vy.z;
space0.l.vz.x[i] = space.l.vz.x; space0.l.vz.y[i] = space.l.vz.y; space0.l.vz.z[i] = space.l.vz.z;
space0.p .x[i] = space.p .x; space0.p .y[i] = space.p .y; space0.p .z[i] = space.p .z;
/*b0.lower.x[i] = a1.lower.x; b0.lower.y[i] = a1.lower.y; b0.lower.z[i] = a1.lower.z;
b0.upper.x[i] = a1.upper.x; b0.upper.y[i] = a1.upper.y; b0.upper.z[i] = a1.upper.z;*/
b1.lower.x[i] = c1.lower.x; b1.lower.y[i] = c1.lower.y; b1.lower.z[i] = c1.lower.z;
b1.upper.x[i] = c1.upper.x; b1.upper.y[i] = c1.upper.y; b1.upper.z[i] = c1.upper.z;
}
/*! Sets ID of child. */
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
/*! Returns the extent of the bounds of the ith child */
__forceinline Vec3fa extent0(size_t i) const {
assert(i < N);
const Vec3fa vx(space0.l.vx.x[i],space0.l.vx.y[i],space0.l.vx.z[i]);
const Vec3fa vy(space0.l.vy.x[i],space0.l.vy.y[i],space0.l.vy.z[i]);
const Vec3fa vz(space0.l.vz.x[i],space0.l.vz.y[i],space0.l.vz.z[i]);
return rsqrt(vx*vx + vy*vy + vz*vz);
}
public:
AffineSpace3vf<N> space0;
//BBox3vf<N> b0; // these are the unit bounds
BBox3vf<N> b1;
};
}

View file

@ -0,0 +1,273 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh_node_base.h"
namespace embree
{
/*! BVHN Quantized Node */
template<int N>
struct __aligned(8) QuantizedBaseNode_t
{
typedef unsigned char T;
static const T MIN_QUAN = 0;
static const T MAX_QUAN = 255;
/*! Clears the node. */
__forceinline void clear() {
for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;
for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),
madd(scale.y,(float)lower_y[i],start.y),
madd(scale.z,(float)lower_z[i],start.z));
const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),
madd(scale.y,(float)upper_y[i],start.y),
madd(scale.z,(float)upper_z[i],start.z));
return BBox3fa(lower,upper);
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extent(size_t i) const {
return bounds(i).size();
}
static __forceinline void init_dim(const vfloat<N> &lower,
const vfloat<N> &upper,
T lower_quant[N],
T upper_quant[N],
float &start,
float &scale)
{
/* quantize bounds */
const vbool<N> m_valid = lower != vfloat<N>(pos_inf);
const float minF = reduce_min(lower);
const float maxF = reduce_max(upper);
float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);
float decode_scale = diff / float(MAX_QUAN);
if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero
assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);
const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;
vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);
vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);
/* lower/upper correction */
vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;
vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;
ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);
iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);
/* disable invalid lanes */
ilower = select(m_valid,ilower,MAX_QUAN);
iupper = select(m_valid,iupper,MIN_QUAN);
/* store as uchar to memory */
vint<N>::store(lower_quant,ilower);
vint<N>::store(upper_quant,iupper);
start = minF;
scale = decode_scale;
#if defined(DEBUG)
vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );
vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );
vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);
vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);
assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));
assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));
#endif
}
__forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)
{
init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);
init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);
init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);
}
__forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }
#if defined(__AVX512F__) // KNL
__forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }
#endif
__forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }
__forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }
__forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }
__forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }
__forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }
__forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }
template <int M>
__forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }
#if defined(__AVX512F__)
__forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }
__forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }
__forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }
#endif
union {
struct {
T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children
T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children
T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children
T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children
T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children
T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children
};
T all_planes[6*N];
};
Vec3f start;
Vec3f scale;
friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)
{
o << "QuantizedBaseNode { " << embree_endl;
o << " start " << n.start << embree_endl;
o << " scale " << n.scale << embree_endl;
o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;
o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;
o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;
o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;
o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;
o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;
o << "}" << embree_endl;
return o;
}
};
template<typename NodeRef, int N>
struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>
{
using BaseNode_t<NodeRef,N>::children;
using QuantizedBaseNode_t<N>::lower_x;
using QuantizedBaseNode_t<N>::upper_x;
using QuantizedBaseNode_t<N>::lower_y;
using QuantizedBaseNode_t<N>::upper_y;
using QuantizedBaseNode_t<N>::lower_z;
using QuantizedBaseNode_t<N>::upper_z;
using QuantizedBaseNode_t<N>::start;
using QuantizedBaseNode_t<N>::scale;
using QuantizedBaseNode_t<N>::init_dim;
__forceinline void setRef(size_t i, const NodeRef& ref) {
assert(i < N);
children[i] = ref;
}
struct Create2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const
{
__aligned(64) AABBNode_t<NodeRef,N> node;
node.clear();
for (size_t i=0; i<n; i++) {
node.setBounds(i,children[i].bounds());
}
QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);
qnode->init(node);
return (size_t)qnode | NodeRef::tyQuantizedNode;
}
};
struct Set2
{
template<typename BuildRecord>
__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const
{
#if defined(DEBUG)
// check that empty children are only at the end of the child list
bool emptyChild = false;
for (size_t i=0; i<num; i++) {
emptyChild |= (children[i] == NodeRef::emptyNode);
assert(emptyChild == (children[i] == NodeRef::emptyNode));
}
#endif
QuantizedNode_t* node = ref.quantizedNode();
for (size_t i=0; i<num; i++) node->setRef(i,children[i]);
return ref;
}
};
__forceinline void init(AABBNode_t<NodeRef,N>& node)
{
for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;
init_dim(node);
}
};
/*! BVHN Quantized Node */
template<int N>
struct __aligned(8) QuantizedBaseNodeMB_t
{
QuantizedBaseNode_t<N> node0;
QuantizedBaseNode_t<N> node1;
/*! Clears the node. */
__forceinline void clear() {
node0.clear();
node1.clear();
}
/*! Returns bounds of specified child. */
__forceinline BBox3fa bounds(size_t i) const
{
assert(i < N);
BBox3fa bounds0 = node0.bounds(i);
BBox3fa bounds1 = node1.bounds(i);
bounds0.extend(bounds1);
return bounds0;
}
/*! Returns extent of bounds of specified child. */
__forceinline Vec3fa extent(size_t i) const {
return bounds(i).size();
}
__forceinline vbool<N> validMask() const { return node0.validMask(); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }
template<typename T>
__forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }
template<int M>
__forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }
};
}

View file

@ -0,0 +1,242 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../common/default.h"
#include "../common/alloc.h"
#include "../common/accel.h"
#include "../common/device.h"
#include "../common/scene.h"
#include "../geometry/primitive.h"
#include "../common/ray.h"
namespace embree
{
/* BVH node reference with bounds */
template<typename NodeRef>
struct BVHNodeRecord
{
__forceinline BVHNodeRecord() {}
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fa& bounds) : ref(ref), bounds((BBox3fx)bounds) {}
__forceinline BVHNodeRecord(NodeRef ref, const BBox3fx& bounds) : ref(ref), bounds(bounds) {}
NodeRef ref;
BBox3fx bounds;
};
template<typename NodeRef>
struct BVHNodeRecordMB
{
__forceinline BVHNodeRecordMB() {}
__forceinline BVHNodeRecordMB(NodeRef ref, const LBBox3fa& lbounds) : ref(ref), lbounds(lbounds) {}
NodeRef ref;
LBBox3fa lbounds;
};
template<typename NodeRef>
struct BVHNodeRecordMB4D
{
__forceinline BVHNodeRecordMB4D() {}
__forceinline BVHNodeRecordMB4D(NodeRef ref, const LBBox3fa& lbounds, const BBox1f& dt) : ref(ref), lbounds(lbounds), dt(dt) {}
NodeRef ref;
LBBox3fa lbounds;
BBox1f dt;
};
template<typename NodeRef, int N> struct BaseNode_t;
template<typename NodeRef, int N> struct AABBNode_t;
template<typename NodeRef, int N> struct AABBNodeMB_t;
template<typename NodeRef, int N> struct AABBNodeMB4D_t;
template<typename NodeRef, int N> struct OBBNode_t;
template<typename NodeRef, int N> struct OBBNodeMB_t;
template<typename NodeRef, int N> struct QuantizedNode_t;
template<typename NodeRef, int N> struct QuantizedNodeMB_t;
/*! Pointer that points to a node or a list of primitives */
template<int N>
struct NodeRefPtr
{
//template<int NN> friend class BVHN;
/*! Number of bytes the nodes and primitives are minimally aligned to.*/
static const size_t byteAlignment = 16;
static const size_t byteNodeAlignment = 4*N;
/*! highest address bit is used as barrier for some algorithms */
static const size_t barrier_mask = (1LL << (8*sizeof(size_t)-1));
/*! Masks the bits that store the number of items per leaf. */
static const size_t align_mask = byteAlignment-1;
static const size_t items_mask = byteAlignment-1;
/*! different supported node types */
static const size_t tyAABBNode = 0;
static const size_t tyAABBNodeMB = 1;
static const size_t tyAABBNodeMB4D = 6;
static const size_t tyOBBNode = 2;
static const size_t tyOBBNodeMB = 3;
static const size_t tyQuantizedNode = 5;
static const size_t tyLeaf = 8;
/*! Empty node */
static const size_t emptyNode = tyLeaf;
/*! Invalid node, used as marker in traversal */
static const size_t invalidNode = (((size_t)-1) & (~items_mask)) | (tyLeaf+0);
static const size_t popRay = (((size_t)-1) & (~items_mask)) | (tyLeaf+1);
/*! Maximum number of primitive blocks in a leaf. */
static const size_t maxLeafBlocks = items_mask-tyLeaf;
/*! Default constructor */
__forceinline NodeRefPtr () {}
/*! Construction from integer */
__forceinline NodeRefPtr (size_t ptr) : ptr(ptr) {}
/*! Cast to size_t */
__forceinline operator size_t() const { return ptr; }
/*! Sets the barrier bit. */
__forceinline void setBarrier() {
#if defined(__64BIT__)
assert(!isBarrier());
ptr |= barrier_mask;
#else
assert(false);
#endif
}
/*! Clears the barrier bit. */
__forceinline void clearBarrier() {
#if defined(__64BIT__)
ptr &= ~barrier_mask;
#else
assert(false);
#endif
}
/*! Checks if this is an barrier. A barrier tells the top level tree rotations how deep to enter the tree. */
__forceinline bool isBarrier() const { return (ptr & barrier_mask) != 0; }
/*! checks if this is a leaf */
__forceinline size_t isLeaf() const { return ptr & tyLeaf; }
/*! returns node type */
__forceinline int type() const { return ptr & (size_t)align_mask; }
/*! checks if this is a node */
__forceinline int isAABBNode() const { return (ptr & (size_t)align_mask) == tyAABBNode; }
/*! checks if this is a motion blur node */
__forceinline int isAABBNodeMB() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB; }
/*! checks if this is a 4D motion blur node */
__forceinline int isAABBNodeMB4D() const { return (ptr & (size_t)align_mask) == tyAABBNodeMB4D; }
/*! checks if this is a node with unaligned bounding boxes */
__forceinline int isOBBNode() const { return (ptr & (size_t)align_mask) == tyOBBNode; }
/*! checks if this is a motion blur node with unaligned bounding boxes */
__forceinline int isOBBNodeMB() const { return (ptr & (size_t)align_mask) == tyOBBNodeMB; }
/*! checks if this is a quantized node */
__forceinline int isQuantizedNode() const { return (ptr & (size_t)align_mask) == tyQuantizedNode; }
/*! Encodes a node */
static __forceinline NodeRefPtr encodeNode(AABBNode_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node);
}
static __forceinline NodeRefPtr encodeNode(AABBNodeMB_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node | tyAABBNodeMB);
}
static __forceinline NodeRefPtr encodeNode(AABBNodeMB4D_t<NodeRefPtr,N>* node) {
assert(!((size_t)node & align_mask));
return NodeRefPtr((size_t) node | tyAABBNodeMB4D);
}
/*! Encodes an unaligned node */
static __forceinline NodeRefPtr encodeNode(OBBNode_t<NodeRefPtr,N>* node) {
return NodeRefPtr((size_t) node | tyOBBNode);
}
/*! Encodes an unaligned motion blur node */
static __forceinline NodeRefPtr encodeNode(OBBNodeMB_t<NodeRefPtr,N>* node) {
return NodeRefPtr((size_t) node | tyOBBNodeMB);
}
/*! Encodes a leaf */
static __forceinline NodeRefPtr encodeLeaf(void* tri, size_t num) {
assert(!((size_t)tri & align_mask));
assert(num <= maxLeafBlocks);
return NodeRefPtr((size_t)tri | (tyLeaf+min(num,(size_t)maxLeafBlocks)));
}
/*! Encodes a leaf */
static __forceinline NodeRefPtr encodeTypedLeaf(void* ptr, size_t ty) {
assert(!((size_t)ptr & align_mask));
return NodeRefPtr((size_t)ptr | (tyLeaf+ty));
}
/*! returns base node pointer */
__forceinline BaseNode_t<NodeRefPtr,N>* baseNode()
{
assert(!isLeaf());
return (BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
}
__forceinline const BaseNode_t<NodeRefPtr,N>* baseNode() const
{
assert(!isLeaf());
return (const BaseNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask);
}
/*! returns node pointer */
__forceinline AABBNode_t<NodeRefPtr,N>* getAABBNode() { assert(isAABBNode()); return ( AABBNode_t<NodeRefPtr,N>*)ptr; }
__forceinline const AABBNode_t<NodeRefPtr,N>* getAABBNode() const { assert(isAABBNode()); return (const AABBNode_t<NodeRefPtr,N>*)ptr; }
/*! returns motion blur node pointer */
__forceinline AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() { assert(isAABBNodeMB() || isAABBNodeMB4D()); return ( AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const AABBNodeMB_t<NodeRefPtr,N>* getAABBNodeMB() const { assert(isAABBNodeMB() || isAABBNodeMB4D()); return (const AABBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns 4D motion blur node pointer */
__forceinline AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() { assert(isAABBNodeMB4D()); return ( AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const AABBNodeMB4D_t<NodeRefPtr,N>* getAABBNodeMB4D() const { assert(isAABBNodeMB4D()); return (const AABBNodeMB4D_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns unaligned node pointer */
__forceinline OBBNode_t<NodeRefPtr,N>* ungetAABBNode() { assert(isOBBNode()); return ( OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const OBBNode_t<NodeRefPtr,N>* ungetAABBNode() const { assert(isOBBNode()); return (const OBBNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns unaligned motion blur node pointer */
__forceinline OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() { assert(isOBBNodeMB()); return ( OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
__forceinline const OBBNodeMB_t<NodeRefPtr,N>* ungetAABBNodeMB() const { assert(isOBBNodeMB()); return (const OBBNodeMB_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask); }
/*! returns quantized node pointer */
__forceinline QuantizedNode_t<NodeRefPtr,N>* quantizedNode() { assert(isQuantizedNode()); return ( QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
__forceinline const QuantizedNode_t<NodeRefPtr,N>* quantizedNode() const { assert(isQuantizedNode()); return (const QuantizedNode_t<NodeRefPtr,N>*)(ptr & ~(size_t)align_mask ); }
/*! returns leaf pointer */
__forceinline char* leaf(size_t& num) const {
assert(isLeaf());
num = (ptr & (size_t)items_mask)-tyLeaf;
return (char*)(ptr & ~(size_t)align_mask);
}
/*! clear all bit flags */
__forceinline void clearFlags() {
ptr &= ~(size_t)align_mask;
}
/*! returns the wideness */
__forceinline size_t getN() const { return N; }
public:
size_t ptr;
};
}

View file

@ -0,0 +1,258 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_refit.h"
#include "bvh_statistics.h"
#include "../geometry/linei.h"
#include "../geometry/triangle.h"
#include "../geometry/trianglev.h"
#include "../geometry/trianglei.h"
#include "../geometry/quadv.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
namespace isa
{
static const size_t SINGLE_THREAD_THRESHOLD = 4*1024;
template<int N>
__forceinline bool compare(const typename BVHN<N>::NodeRef* a, const typename BVHN<N>::NodeRef* b)
{
size_t sa = *(size_t*)&a->node()->lower_x;
size_t sb = *(size_t*)&b->node()->lower_x;
return sa < sb;
}
template<int N>
BVHNRefitter<N>::BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds)
: bvh(bvh), leafBounds(leafBounds), numSubTrees(0)
{
}
template<int N>
void BVHNRefitter<N>::refit()
{
if (bvh->numPrimitives <= SINGLE_THREAD_THRESHOLD) {
bvh->bounds = LBBox3fa(recurse_bottom(bvh->root));
}
else
{
BBox3fa subTreeBounds[MAX_NUM_SUB_TREES];
numSubTrees = 0;
gather_subtree_refs(bvh->root,numSubTrees,0);
if (numSubTrees)
parallel_for(size_t(0), numSubTrees, size_t(1), [&](const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
NodeRef& ref = subTrees[i];
subTreeBounds[i] = recurse_bottom(ref);
}
});
numSubTrees = 0;
bvh->bounds = LBBox3fa(refit_toplevel(bvh->root,numSubTrees,subTreeBounds,0));
}
}
template<int N>
void BVHNRefitter<N>::gather_subtree_refs(NodeRef& ref,
size_t &subtrees,
const size_t depth)
{
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
{
assert(subtrees < MAX_NUM_SUB_TREES);
subTrees[subtrees++] = ref;
return;
}
if (ref.isAABBNode())
{
AABBNode* node = ref.getAABBNode();
for (size_t i=0; i<N; i++) {
NodeRef& child = node->child(i);
if (unlikely(child == BVH::emptyNode)) continue;
gather_subtree_refs(child,subtrees,depth+1);
}
}
}
template<int N>
BBox3fa BVHNRefitter<N>::refit_toplevel(NodeRef& ref,
size_t &subtrees,
const BBox3fa *const subTreeBounds,
const size_t depth)
{
if (depth >= MAX_SUB_TREE_EXTRACTION_DEPTH)
{
assert(subtrees < MAX_NUM_SUB_TREES);
assert(subTrees[subtrees] == ref);
return subTreeBounds[subtrees++];
}
if (ref.isAABBNode())
{
AABBNode* node = ref.getAABBNode();
BBox3fa bounds[N];
for (size_t i=0; i<N; i++)
{
NodeRef& child = node->child(i);
if (unlikely(child == BVH::emptyNode))
bounds[i] = BBox3fa(empty);
else
bounds[i] = refit_toplevel(child,subtrees,subTreeBounds,depth+1);
}
BBox3vf<N> boundsT = transpose<N>(bounds);
/* set new bounds */
node->lower_x = boundsT.lower.x;
node->lower_y = boundsT.lower.y;
node->lower_z = boundsT.lower.z;
node->upper_x = boundsT.upper.x;
node->upper_y = boundsT.upper.y;
node->upper_z = boundsT.upper.z;
return merge<N>(bounds);
}
else
return leafBounds.leafBounds(ref);
}
// =========================================================
// =========================================================
// =========================================================
template<int N>
BBox3fa BVHNRefitter<N>::recurse_bottom(NodeRef& ref)
{
/* this is a leaf node */
if (unlikely(ref.isLeaf()))
return leafBounds.leafBounds(ref);
/* recurse if this is an internal node */
AABBNode* node = ref.getAABBNode();
/* enable exclusive prefetch for >= AVX platforms */
#if defined(__AVX__)
BVH::prefetchW(ref);
#endif
BBox3fa bounds[N];
for (size_t i=0; i<N; i++)
if (unlikely(node->child(i) == BVH::emptyNode))
{
bounds[i] = BBox3fa(empty);
}
else
bounds[i] = recurse_bottom(node->child(i));
/* AOS to SOA transform */
BBox3vf<N> boundsT = transpose<N>(bounds);
/* set new bounds */
node->lower_x = boundsT.lower.x;
node->lower_y = boundsT.lower.y;
node->lower_z = boundsT.lower.z;
node->upper_x = boundsT.upper.x;
node->upper_y = boundsT.upper.y;
node->upper_z = boundsT.upper.z;
return merge<N>(bounds);
}
template<int N, typename Mesh, typename Primitive>
BVHNRefitT<N,Mesh,Primitive>::BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode)
: bvh(bvh), builder(builder), refitter(new BVHNRefitter<N>(bvh,*(typename BVHNRefitter<N>::LeafBoundsInterface*)this)), mesh(mesh), topologyVersion(0) {}
template<int N, typename Mesh, typename Primitive>
void BVHNRefitT<N,Mesh,Primitive>::clear()
{
if (builder)
builder->clear();
}
template<int N, typename Mesh, typename Primitive>
void BVHNRefitT<N,Mesh,Primitive>::build()
{
if (mesh->topologyChanged(topologyVersion)) {
topologyVersion = mesh->getTopologyVersion();
builder->build();
}
else
refitter->refit();
}
template class BVHNRefitter<4>;
#if defined(__AVX__)
template class BVHNRefitter<8>;
#endif
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4> ((BVH4*)accel,BVH4Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH4Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4v>((BVH4*)accel,BVH4Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH4Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,TriangleMesh,Triangle4i>((BVH4*)accel,BVH4Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4iMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Triangle4MeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4> ((BVH8*)accel,BVH8Triangle4MeshBuilderSAH (accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH8Triangle4vMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4v>((BVH8*)accel,BVH8Triangle4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
Builder* BVH8Triangle4iMeshRefitSAH (void* accel, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,TriangleMesh,Triangle4i>((BVH8*)accel,BVH8Triangle4iMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
Builder* BVH4Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH4Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,QuadMesh,Quad4v>((BVH4*)accel,BVH4Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8Quad4vMeshBuilderSAH (void* bvh, QuadMesh* mesh, unsigned int geomID, size_t mode);
Builder* BVH8Quad4vMeshRefitSAH (void* accel, QuadMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,QuadMesh,Quad4v>((BVH8*)accel,BVH8Quad4vMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_USER)
Builder* BVH4VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
Builder* BVH4VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,UserGeometry,Object>((BVH4*)accel,BVH4VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8VirtualMeshBuilderSAH (void* bvh, UserGeometry* mesh, unsigned int geomID, size_t mode);
Builder* BVH8VirtualMeshRefitSAH (void* accel, UserGeometry* mesh, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,UserGeometry,Object>((BVH8*)accel,BVH8VirtualMeshBuilderSAH(accel,mesh,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH4InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,Instance,InstancePrimitive>((BVH4*)accel,BVH4InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH8InstanceMeshRefitSAH (void* accel, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,Instance,InstancePrimitive>((BVH8*)accel,BVH8InstanceMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH4InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)accel,BVH4InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode);
Builder* BVH8InstanceArrayMeshRefitSAH (void* accel, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new BVHNRefitT<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)accel,BVH8InstanceArrayMeshBuilderSAH(accel,mesh,gtype,geomID,mode),mesh,mode); }
#endif
#endif
}
}

View file

@ -0,0 +1,95 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNRefitter
{
public:
/*! Type shortcuts */
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
struct LeafBoundsInterface {
virtual const BBox3fa leafBounds(NodeRef& ref) const = 0;
};
public:
/*! Constructor. */
BVHNRefitter (BVH* bvh, const LeafBoundsInterface& leafBounds);
/*! refits the BVH */
void refit();
private:
/* single-threaded subtree extraction based on BVH depth */
void gather_subtree_refs(NodeRef& ref,
size_t &subtrees,
const size_t depth = 0);
/* single-threaded top-level refit */
BBox3fa refit_toplevel(NodeRef& ref,
size_t &subtrees,
const BBox3fa *const subTreeBounds,
const size_t depth = 0);
/* single-threaded subtree refit */
BBox3fa recurse_bottom(NodeRef& ref);
public:
BVH* bvh; //!< BVH to refit
const LeafBoundsInterface& leafBounds; //!< calculates bounds of leaves
static const size_t MAX_SUB_TREE_EXTRACTION_DEPTH = (N==4) ? 4 : (N==8) ? 3 : 3;
static const size_t MAX_NUM_SUB_TREES = (N==4) ? 256 : (N==8) ? 512 : N*N*N; // N ^ MAX_SUB_TREE_EXTRACTION_DEPTH
size_t numSubTrees;
NodeRef subTrees[MAX_NUM_SUB_TREES];
};
template<int N, typename Mesh, typename Primitive>
class BVHNRefitT : public Builder, public BVHNRefitter<N>::LeafBoundsInterface
{
public:
/*! Type shortcuts */
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::NodeRef NodeRef;
public:
BVHNRefitT (BVH* bvh, Builder* builder, Mesh* mesh, size_t mode);
virtual void build();
virtual void clear();
virtual const BBox3fa leafBounds (NodeRef& ref) const
{
size_t num; char* prim = ref.leaf(num);
if (unlikely(ref == BVH::emptyNode)) return empty;
BBox3fa bounds = empty;
for (size_t i=0; i<num; i++)
bounds.extend(((Primitive*)prim)[i].update(mesh));
return bounds;
}
private:
BVH* bvh;
std::unique_ptr<Builder> builder;
std::unique_ptr<BVHNRefitter<N>> refitter;
Mesh* mesh;
unsigned int topologyVersion;
};
}
}

View file

@ -0,0 +1,127 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_rotate.h"
namespace embree
{
namespace isa
{
/*! Computes half surface area of box. */
__forceinline float halfArea3f(const BBox<vfloat4>& box) {
const vfloat4 d = box.size();
const vfloat4 a = d*shuffle<1,2,0,3>(d);
return a[0]+a[1]+a[2];
}
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth)
{
/*! nothing to rotate if we reached a leaf node. */
if (parentRef.isBarrier()) return 0;
if (parentRef.isLeaf()) return 0;
AABBNode* parent = parentRef.getAABBNode();
/*! rotate all children first */
vint4 cdepth;
for (size_t c=0; c<4; c++)
cdepth[c] = (int)rotate(parent->child(c),depth+1);
/* compute current areas of all children */
vfloat4 sizeX = parent->upper_x-parent->lower_x;
vfloat4 sizeY = parent->upper_y-parent->lower_y;
vfloat4 sizeZ = parent->upper_z-parent->lower_z;
vfloat4 childArea = madd(sizeX,(sizeY + sizeZ),sizeY*sizeZ);
/*! get node bounds */
BBox<vfloat4> child1_0,child1_1,child1_2,child1_3;
parent->bounds(child1_0,child1_1,child1_2,child1_3);
/*! Find best rotation. We pick a first child (child1) and a sub-child
(child2child) of a different second child (child2), and swap child1
and child2child. We perform the best such swap. */
float bestArea = 0;
size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1;
for (size_t c2=0; c2<4; c2++)
{
/*! ignore leaf nodes as we cannot descent into them */
if (parent->child(c2).isBarrier()) continue;
if (parent->child(c2).isLeaf()) continue;
AABBNode* child2 = parent->child(c2).getAABBNode();
/*! transpose child bounds */
BBox<vfloat4> child2c0,child2c1,child2c2,child2c3;
child2->bounds(child2c0,child2c1,child2c2,child2c3);
/*! put child1_0 at each child2 position */
float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3));
float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3));
float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3));
float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0));
vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03);
vfloat4 min0 = vreduce_min(cost0);
int pos0 = (int)bsf(movemask(min0 == cost0));
/*! put child1_1 at each child2 position */
float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3));
float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3));
float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3));
float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1));
vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13);
vfloat4 min1 = vreduce_min(cost1);
int pos1 = (int)bsf(movemask(min1 == cost1));
/*! put child1_2 at each child2 position */
float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3));
float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3));
float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3));
float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2));
vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23);
vfloat4 min2 = vreduce_min(cost2);
int pos2 = (int)bsf(movemask(min2 == cost2));
/*! put child1_3 at each child2 position */
float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3));
float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3));
float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3));
float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3));
vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33);
vfloat4 min3 = vreduce_min(cost3);
int pos3 = (int)bsf(movemask(min3 == cost3));
/*! find best other child */
vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]);
int pos[4] = { pos0,pos1,pos2,pos3 };
const size_t mbd = BVH4::maxBuildDepth;
vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints
valid &= vint4(int(c2)) != vint4(step);
if (none(valid)) continue;
size_t c1 = select_min(valid,area0123);
float area = area0123[c1];
if (c1 == c2) continue; // can happen if bounds are NANs
/*! accept a swap when it reduces cost and is not swapping a node with itself */
if (area < bestArea) {
bestArea = area;
bestChild1 = c1;
bestChild2 = c2;
bestChild2Child = pos[c1];
}
}
/*! if we did not find a swap that improves the SAH then do nothing */
if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth);
/*! perform the best found tree rotation */
AABBNode* child2 = parent->child(bestChild2).getAABBNode();
AABBNode::swap(parent,bestChild1,child2,bestChild2Child);
parent->setBounds(bestChild2,child2->bounds());
AABBNode::compact(parent);
AABBNode::compact(child2);
/*! This returned depth is conservative as the child that was
* pulled up in the tree could have been on the critical path. */
cdepth[bestChild1]++; // bestChild1 was pushed down one level
return 1+reduce_max(cdepth);
}
}
}

View file

@ -0,0 +1,37 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
namespace embree
{
namespace isa
{
template<int N>
class BVHNRotate
{
typedef typename BVHN<N>::NodeRef NodeRef;
public:
static const bool enabled = false;
static __forceinline size_t rotate(NodeRef parentRef, size_t depth = 1) { return 0; }
static __forceinline void restructure(NodeRef ref, size_t depth = 1) {}
};
/* BVH4 tree rotations */
template<>
class BVHNRotate<4>
{
typedef BVH4::AABBNode AABBNode;
typedef BVH4::NodeRef NodeRef;
public:
static const bool enabled = true;
static size_t rotate(NodeRef parentRef, size_t depth = 1);
};
}
}

View file

@ -0,0 +1,168 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_statistics.h"
#include "../../common/algorithms/parallel_reduce.h"
namespace embree
{
template<int N>
BVHNStatistics<N>::BVHNStatistics (BVH* bvh) : bvh(bvh)
{
double A = max(0.0f,bvh->getLinearBounds().expectedHalfArea());
stat = statistics(bvh->root,A,BBox1f(0.0f,1.0f));
}
template<int N>
std::string BVHNStatistics<N>::str()
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << " primitives = " << bvh->numPrimitives << ", vertices = " << bvh->numVertices << ", depth = " << stat.depth << std::endl;
size_t totalBytes = stat.bytes(bvh);
double totalSAH = stat.sah(bvh);
stream << " total : sah = " << std::setw(7) << std::setprecision(3) << totalSAH << " (100.00%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << totalBytes/1E6 << " MB (100.00%), ";
stream << "#nodes = " << std::setw(7) << stat.size() << " (" << std::setw(6) << std::setprecision(2) << 100.0*stat.fillRate(bvh) << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(totalBytes)/double(bvh->numPrimitives) << std::endl;
if (stat.statAABBNodes.numNodes ) stream << " getAABBNodes : " << stat.statAABBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statOBBNodes.numNodes ) stream << " ungetAABBNodes : " << stat.statOBBNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statAABBNodesMB.numNodes ) stream << " getAABBNodesMB : " << stat.statAABBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statAABBNodesMB4D.numNodes) stream << " getAABBNodesMB4D : " << stat.statAABBNodesMB4D.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statOBBNodesMB.numNodes) stream << " ungetAABBNodesMB : " << stat.statOBBNodesMB.toString(bvh,totalSAH,totalBytes) << std::endl;
if (stat.statQuantizedNodes.numNodes ) stream << " quantizedNodes : " << stat.statQuantizedNodes.toString(bvh,totalSAH,totalBytes) << std::endl;
if (true) stream << " leaves : " << stat.statLeaf.toString(bvh,totalSAH,totalBytes) << std::endl;
if (true) stream << " histogram : " << stat.statLeaf.histToString() << std::endl;
return stream.str();
}
template<int N>
typename BVHNStatistics<N>::Statistics BVHNStatistics<N>::statistics(NodeRef node, const double A, const BBox1f t0t1)
{
Statistics s;
assert(t0t1.size() > 0.0f);
double dt = max(0.0f,t0t1.size());
if (node.isAABBNode())
{
AABBNode* n = node.getAABBNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extend(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statAABBNodes.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodes.numNodes++;
s.statAABBNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isOBBNode())
{
OBBNode* n = node.ungetAABBNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statOBBNodes.numChildren++;
return s;
}, Statistics::add);
s.statOBBNodes.numNodes++;
s.statOBBNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isAABBNodeMB())
{
AABBNodeMB* n = node.getAABBNodeMB();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,n->expectedHalfArea(i,t0t1));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statAABBNodesMB.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodesMB.numNodes++;
s.statAABBNodesMB.nodeSAH += dt*A;
s.depth++;
}
else if (node.isAABBNodeMB4D())
{
AABBNodeMB4D* n = node.getAABBNodeMB4D();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const BBox1f t0t1i = intersect(t0t1,n->timeRange(i));
assert(!t0t1i.empty());
const double Ai = n->AABBNodeMB::expectedHalfArea(i,t0t1i);
Statistics s = statistics(n->child(i),Ai,t0t1i);
s.statAABBNodesMB4D.numChildren++;
return s;
}, Statistics::add);
s.statAABBNodesMB4D.numNodes++;
s.statAABBNodesMB4D.nodeSAH += dt*A;
s.depth++;
}
else if (node.isOBBNodeMB())
{
OBBNodeMB* n = node.ungetAABBNodeMB();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent0(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statOBBNodesMB.numChildren++;
return s;
}, Statistics::add);
s.statOBBNodesMB.numNodes++;
s.statOBBNodesMB.nodeSAH += dt*A;
s.depth++;
}
else if (node.isQuantizedNode())
{
QuantizedNode* n = node.quantizedNode();
s = s + parallel_reduce(0,N,Statistics(),[&] ( const int i ) {
if (n->child(i) == BVH::emptyNode) return Statistics();
const double Ai = max(0.0f,halfArea(n->extent(i)));
Statistics s = statistics(n->child(i),Ai,t0t1);
s.statQuantizedNodes.numChildren++;
return s;
}, Statistics::add);
s.statQuantizedNodes.numNodes++;
s.statQuantizedNodes.nodeSAH += dt*A;
s.depth++;
}
else if (node.isLeaf())
{
size_t num; const char* tri = node.leaf(num);
if (num)
{
for (size_t i=0; i<num; i++)
{
const size_t bytes = bvh->primTy->getBytes(tri);
s.statLeaf.numPrimsActive += bvh->primTy->sizeActive(tri);
s.statLeaf.numPrimsTotal += bvh->primTy->sizeTotal(tri);
s.statLeaf.numBytes += bytes;
tri+=bytes;
}
s.statLeaf.numLeaves++;
s.statLeaf.numPrimBlocks += num;
s.statLeaf.leafSAH += dt*A*num;
if (num-1 < Statistics::LeafStat::NHIST) {
s.statLeaf.numPrimBlocksHistogram[num-1]++;
}
}
}
else {
// -- GODOT start --
// throw std::runtime_error("not supported node type in bvh_statistics");
abort();
// -- GODOT end --
}
return s;
}
#if defined(__AVX__)
template class BVHNStatistics<8>;
#endif
#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__)
template class BVHNStatistics<4>;
#endif
}

View file

@ -0,0 +1,285 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include <sstream>
namespace embree
{
template<int N>
class BVHNStatistics
{
typedef BVHN<N> BVH;
typedef typename BVH::AABBNode AABBNode;
typedef typename BVH::OBBNode OBBNode;
typedef typename BVH::AABBNodeMB AABBNodeMB;
typedef typename BVH::AABBNodeMB4D AABBNodeMB4D;
typedef typename BVH::OBBNodeMB OBBNodeMB;
typedef typename BVH::QuantizedNode QuantizedNode;
typedef typename BVH::NodeRef NodeRef;
struct Statistics
{
template<typename Node>
struct NodeStat
{
NodeStat ( double nodeSAH = 0,
size_t numNodes = 0,
size_t numChildren = 0)
: nodeSAH(nodeSAH),
numNodes(numNodes),
numChildren(numChildren) {}
double sah(BVH* bvh) const {
return nodeSAH/bvh->getLinearBounds().expectedHalfArea();
}
size_t bytes() const {
return numNodes*sizeof(Node);
}
size_t size() const {
return numNodes;
}
double fillRateNom () const { return double(numChildren); }
double fillRateDen () const { return double(numNodes*N); }
double fillRate () const { return fillRateNom()/fillRateDen(); }
__forceinline friend NodeStat operator+ ( const NodeStat& a, const NodeStat& b)
{
return NodeStat(a.nodeSAH + b.nodeSAH,
a.numNodes+b.numNodes,
a.numChildren+b.numChildren);
}
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << bytes()/1E6 << " MB ";
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes())/double(bytesTotal) << "%), ";
stream << "#nodes = " << std::setw(7) << numNodes << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate() << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes())/double(bvh->numPrimitives);
return stream.str();
}
public:
double nodeSAH;
size_t numNodes;
size_t numChildren;
};
struct LeafStat
{
static const int NHIST = 8;
LeafStat ( double leafSAH = 0.0f,
size_t numLeaves = 0,
size_t numPrimsActive = 0,
size_t numPrimsTotal = 0,
size_t numPrimBlocks = 0,
size_t numBytes = 0)
: leafSAH(leafSAH),
numLeaves(numLeaves),
numPrimsActive(numPrimsActive),
numPrimsTotal(numPrimsTotal),
numPrimBlocks(numPrimBlocks),
numBytes(numBytes)
{
for (size_t i=0; i<NHIST; i++)
numPrimBlocksHistogram[i] = 0;
}
double sah(BVH* bvh) const {
return leafSAH/bvh->getLinearBounds().expectedHalfArea();
}
size_t bytes(BVH* bvh) const {
return numBytes;
}
size_t size() const {
return numLeaves;
}
double fillRateNom (BVH* bvh) const { return double(numPrimsActive); }
double fillRateDen (BVH* bvh) const { return double(numPrimsTotal); }
double fillRate (BVH* bvh) const { return fillRateNom(bvh)/fillRateDen(bvh); }
__forceinline friend LeafStat operator+ ( const LeafStat& a, const LeafStat& b)
{
LeafStat stat(a.leafSAH + b.leafSAH,
a.numLeaves+b.numLeaves,
a.numPrimsActive+b.numPrimsActive,
a.numPrimsTotal+b.numPrimsTotal,
a.numPrimBlocks+b.numPrimBlocks,
a.numBytes+b.numBytes);
for (size_t i=0; i<NHIST; i++) {
stat.numPrimBlocksHistogram[i] += a.numPrimBlocksHistogram[i];
stat.numPrimBlocksHistogram[i] += b.numPrimBlocksHistogram[i];
}
return stat;
}
std::string toString(BVH* bvh, double sahTotal, size_t bytesTotal) const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
stream << "sah = " << std::setw(7) << std::setprecision(3) << sah(bvh);
stream << " (" << std::setw(6) << std::setprecision(2) << 100.0*sah(bvh)/sahTotal << "%), ";
stream << "#bytes = " << std::setw(7) << std::setprecision(2) << double(bytes(bvh))/1E6 << " MB ";
stream << "(" << std::setw(6) << std::setprecision(2) << 100.0*double(bytes(bvh))/double(bytesTotal) << "%), ";
stream << "#nodes = " << std::setw(7) << numLeaves << " (" << std::setw(6) << std::setprecision(2) << 100.0*fillRate(bvh) << "% filled), ";
stream << "#bytes/prim = " << std::setw(6) << std::setprecision(2) << double(bytes(bvh))/double(bvh->numPrimitives);
return stream.str();
}
std::string histToString() const
{
std::ostringstream stream;
stream.setf(std::ios::fixed, std::ios::floatfield);
for (size_t i=0; i<NHIST; i++)
stream << std::setw(6) << std::setprecision(2) << 100.0f*float(numPrimBlocksHistogram[i])/float(numLeaves) << "% ";
return stream.str();
}
public:
double leafSAH; //!< SAH of the leaves only
size_t numLeaves; //!< Number of leaf nodes.
size_t numPrimsActive; //!< Number of active primitives (
size_t numPrimsTotal; //!< Number of active and inactive primitives
size_t numPrimBlocks; //!< Number of primitive blocks.
size_t numBytes; //!< Number of bytes of leaves.
size_t numPrimBlocksHistogram[8];
};
public:
Statistics (size_t depth = 0,
LeafStat statLeaf = LeafStat(),
NodeStat<AABBNode> statAABBNodes = NodeStat<AABBNode>(),
NodeStat<OBBNode> statOBBNodes = NodeStat<OBBNode>(),
NodeStat<AABBNodeMB> statAABBNodesMB = NodeStat<AABBNodeMB>(),
NodeStat<AABBNodeMB4D> statAABBNodesMB4D = NodeStat<AABBNodeMB4D>(),
NodeStat<OBBNodeMB> statOBBNodesMB = NodeStat<OBBNodeMB>(),
NodeStat<QuantizedNode> statQuantizedNodes = NodeStat<QuantizedNode>())
: depth(depth),
statLeaf(statLeaf),
statAABBNodes(statAABBNodes),
statOBBNodes(statOBBNodes),
statAABBNodesMB(statAABBNodesMB),
statAABBNodesMB4D(statAABBNodesMB4D),
statOBBNodesMB(statOBBNodesMB),
statQuantizedNodes(statQuantizedNodes) {}
double sah(BVH* bvh) const
{
return statLeaf.sah(bvh) +
statAABBNodes.sah(bvh) +
statOBBNodes.sah(bvh) +
statAABBNodesMB.sah(bvh) +
statAABBNodesMB4D.sah(bvh) +
statOBBNodesMB.sah(bvh) +
statQuantizedNodes.sah(bvh);
}
size_t bytes(BVH* bvh) const {
return statLeaf.bytes(bvh) +
statAABBNodes.bytes() +
statOBBNodes.bytes() +
statAABBNodesMB.bytes() +
statAABBNodesMB4D.bytes() +
statOBBNodesMB.bytes() +
statQuantizedNodes.bytes();
}
size_t size() const
{
return statLeaf.size() +
statAABBNodes.size() +
statOBBNodes.size() +
statAABBNodesMB.size() +
statAABBNodesMB4D.size() +
statOBBNodesMB.size() +
statQuantizedNodes.size();
}
double fillRate (BVH* bvh) const
{
double nom = statLeaf.fillRateNom(bvh) +
statAABBNodes.fillRateNom() +
statOBBNodes.fillRateNom() +
statAABBNodesMB.fillRateNom() +
statAABBNodesMB4D.fillRateNom() +
statOBBNodesMB.fillRateNom() +
statQuantizedNodes.fillRateNom();
double den = statLeaf.fillRateDen(bvh) +
statAABBNodes.fillRateDen() +
statOBBNodes.fillRateDen() +
statAABBNodesMB.fillRateDen() +
statAABBNodesMB4D.fillRateDen() +
statOBBNodesMB.fillRateDen() +
statQuantizedNodes.fillRateDen();
return nom/den;
}
friend Statistics operator+ ( const Statistics& a, const Statistics& b )
{
return Statistics(max(a.depth,b.depth),
a.statLeaf + b.statLeaf,
a.statAABBNodes + b.statAABBNodes,
a.statOBBNodes + b.statOBBNodes,
a.statAABBNodesMB + b.statAABBNodesMB,
a.statAABBNodesMB4D + b.statAABBNodesMB4D,
a.statOBBNodesMB + b.statOBBNodesMB,
a.statQuantizedNodes + b.statQuantizedNodes);
}
static Statistics add ( const Statistics& a, const Statistics& b ) {
return a+b;
}
public:
size_t depth;
LeafStat statLeaf;
NodeStat<AABBNode> statAABBNodes;
NodeStat<OBBNode> statOBBNodes;
NodeStat<AABBNodeMB> statAABBNodesMB;
NodeStat<AABBNodeMB4D> statAABBNodesMB4D;
NodeStat<OBBNodeMB> statOBBNodesMB;
NodeStat<QuantizedNode> statQuantizedNodes;
};
public:
/* Constructor gathers statistics. */
BVHNStatistics (BVH* bvh);
/*! Convert statistics into a string */
std::string str();
double sah() const {
return stat.sah(bvh);
}
size_t bytesUsed() const {
return stat.bytes(bvh);
}
private:
Statistics statistics(NodeRef node, const double A, const BBox1f dt);
private:
BVH* bvh;
Statistics stat;
};
typedef BVHNStatistics<4> BVH4Statistics;
typedef BVHNStatistics<8> BVH8Statistics;
}

View file

@ -0,0 +1,466 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
#include "node_intersector1.h"
#include "../common/stack_item.h"
#define NEW_SORTING_CODE 1
namespace embree
{
namespace isa
{
/*! BVH regular node traversal for single rays. */
template<int N, int types>
class BVHNNodeTraverser1Hit;
#if defined(__AVX512VL__) // SKX
template<int N>
__forceinline void isort_update(vint<N> &dist, const vint<N> &d)
{
const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
const vboolf<N> m_geq = d >= dist;
const vboolf<N> m_geq_shift = m_geq << 1;
dist = select(m_geq,d,dist);
dist = select(m_geq_shift,dist_shift,dist);
}
template<int N>
__forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
}
__forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
return toScalar(permutex2var((__m256i)index,n0,n1));
}
__forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
return toScalar(permute(n,index));
}
#endif
/* Specialization for BVH4. */
template<int types>
class BVHNNodeTraverser1Hit<4, types>
{
typedef BVH4 BVH;
typedef BVH4::NodeRef NodeRef;
typedef BVH4::BaseNode BaseNode;
public:
/* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
static __forceinline void traverseClosestHit(NodeRef& cur,
size_t mask,
const vfloat4& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
if (likely(mask == 0)) {
assert(cur != BVH::emptyNode);
return;
}
/*! two children are hit, push far child, and continue with closer child */
NodeRef c0 = cur;
const unsigned int d0 = ((unsigned int*)&tNear)[r];
r = bscf(mask);
NodeRef c1 = node->child(r);
BVH::prefetch(c1,types);
const unsigned int d1 = ((unsigned int*)&tNear)[r];
assert(c0 != BVH::emptyNode);
assert(c1 != BVH::emptyNode);
if (likely(mask == 0)) {
assert(stackPtr < stackEnd);
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
}
#if NEW_SORTING_CODE == 1
vint4 s0((size_t)c0,(size_t)d0);
vint4 s1((size_t)c1,(size_t)d1);
r = bscf(mask);
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
vint4 s2((size_t)c2,(size_t)d2);
/* 3 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort3(s0,s1,s2);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
cur = toSizeT(s2);
stackPtr+=2;
return;
}
r = bscf(mask);
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
vint4 s3((size_t)c3,(size_t)d3);
/* 4 hits */
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
cur = toSizeT(s3);
stackPtr+=3;
#else
/*! Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there. */
assert(stackPtr < stackEnd);
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
assert(stackPtr < stackEnd);
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#endif
}
/* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
static __forceinline void traverseAnyHit(NodeRef& cur,
size_t mask,
const vfloat4& tNear,
NodeRef*& stackPtr,
NodeRef* stackEnd)
{
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
/* simpler in sequence traversal order */
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
for (; ;)
{
r = bscf(mask);
cur = node->child(r); BVH::prefetch(cur,types);
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
}
}
};
/* Specialization for BVH8. */
template<int types>
class BVHNNodeTraverser1Hit<8, types>
{
typedef BVH8 BVH;
typedef BVH8::NodeRef NodeRef;
typedef BVH8::BaseNode BaseNode;
#if defined(__AVX512VL__)
template<class NodeRef, class BaseNode>
static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
const BaseNode* node = cur.baseNode();
const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
distance_i = vint8::compact((int)mask,distance_i,distance_i);
cur = permuteExtract(distance_i,n0,n1);
BVH::prefetch(cur,types);
mask &= mask-1;
if (likely(mask == 0)) return;
/* 2 hits: order A0 B0 */
const vint8 d0(distance_i);
const vint8 d1(shuffle<1>(distance_i));
cur = permuteExtract(d1,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A0 = min(d0, d1);
const vint8 dist_B0 = max(d0, d1);
assert(dist_A0[0] < dist_B0[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A0,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
stackPtr++;
return;
}
/* 3 hits: order A1 B1 C1 */
const vint8 d2(shuffle<2>(distance_i));
cur = permuteExtract(d2,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A1 = min(dist_A0,d2);
const vint8 dist_tmp_B1 = max(dist_A0,d2);
const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
assert(dist_A1[0] < dist_B1[0]);
assert(dist_B1[0] < dist_C1[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A1,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
*(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
stackPtr+=2;
return;
}
/* 4 hits: order A2 B2 C2 D2 */
const vint8 d3(shuffle<3>(distance_i));
cur = permuteExtract(d3,n0,n1);
BVH::prefetch(cur,types);
const vint8 dist_A2 = min(dist_A1,d3);
const vint8 dist_tmp_B2 = max(dist_A1,d3);
const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
assert(dist_A2[0] < dist_B2[0]);
assert(dist_B2[0] < dist_C2[0]);
assert(dist_C2[0] < dist_D2[0]);
mask &= mask-1;
if (likely(mask == 0)) {
cur = permuteExtract(dist_A2,n0,n1);
stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
*(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
*(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
*(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
stackPtr+=3;
return;
}
/* >=5 hits: reverse to descending order for writing to stack */
distance_i = align_shift_right<3>(distance_i,distance_i);
const size_t hits = 4 + popcnt(mask);
vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
isort_quick_update<8>(dist,dist_A2);
isort_quick_update<8>(dist,dist_B2);
isort_quick_update<8>(dist,dist_C2);
isort_quick_update<8>(dist,dist_D2);
do {
distance_i = align_shift_right<1>(distance_i,distance_i);
cur = permuteExtract(distance_i,n0,n1);
BVH::prefetch(cur,types);
const vint8 new_dist(permute(distance_i,vint8(zero)));
mask &= mask-1;
isort_update<8>(dist,new_dist);
} while(mask);
for (size_t i=0; i<7; i++)
assert(dist[i+0]>=dist[i+1]);
for (size_t i=0;i<hits-1;i++)
{
stackPtr->ptr = permuteExtract(dist,n0,n1);
*(float*)&stackPtr->dist = permuteExtract(dist,tNear);
dist = align_shift_right<1>(dist,dist);
stackPtr++;
}
cur = permuteExtract(dist,n0,n1);
}
#endif
public:
static __forceinline void traverseClosestHit(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
StackItemT<NodeRef>*& stackPtr,
StackItemT<NodeRef>* stackEnd)
{
assert(mask != 0);
#if defined(__AVX512VL__)
traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
#else
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
if (likely(mask == 0)) {
assert(cur != BVH::emptyNode);
return;
}
/*! two children are hit, push far child, and continue with closer child */
NodeRef c0 = cur;
const unsigned int d0 = ((unsigned int*)&tNear)[r];
r = bscf(mask);
NodeRef c1 = node->child(r);
BVH::prefetch(c1,types);
const unsigned int d1 = ((unsigned int*)&tNear)[r];
assert(c0 != BVH::emptyNode);
assert(c1 != BVH::emptyNode);
if (likely(mask == 0)) {
assert(stackPtr < stackEnd);
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
}
#if NEW_SORTING_CODE == 1
vint4 s0((size_t)c0,(size_t)d0);
vint4 s1((size_t)c1,(size_t)d1);
r = bscf(mask);
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
vint4 s2((size_t)c2,(size_t)d2);
/* 3 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort3(s0,s1,s2);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
cur = toSizeT(s2);
stackPtr+=2;
return;
}
r = bscf(mask);
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
vint4 s3((size_t)c3,(size_t)d3);
/* 4 hits */
if (likely(mask == 0)) {
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
cur = toSizeT(s3);
stackPtr+=3;
return;
}
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
/*! fallback case if more than 4 children are hit */
StackItemT<NodeRef>* stackFirst = stackPtr;
stackPtr+=4;
while (1)
{
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
const vint4 s((size_t)c,(size_t)d);
*(vint4*)stackPtr++ = s;
assert(c != BVH::emptyNode);
if (unlikely(mask == 0)) break;
}
sort(stackFirst,stackPtr);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#else
/*! Here starts the slow path for 3 or 4 hit children. We push
* all nodes onto the stack to sort them there. */
assert(stackPtr < stackEnd);
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
assert(stackPtr < stackEnd);
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (likely(mask == 0)) {
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
return;
}
/*! fallback case if more than 4 children are hit */
StackItemT<NodeRef>* stackFirst = stackPtr-4;
while (1)
{
assert(stackPtr < stackEnd);
r = bscf(mask);
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
assert(c != BVH::emptyNode);
if (unlikely(mask == 0)) break;
}
sort(stackFirst,stackPtr);
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
#endif
#endif
}
static __forceinline void traverseAnyHit(NodeRef& cur,
size_t mask,
const vfloat8& tNear,
NodeRef*& stackPtr,
NodeRef* stackEnd)
{
const BaseNode* node = cur.baseNode();
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
cur = node->child(r);
BVH::prefetch(cur,types);
/* simpler in sequence traversal order */
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
for (; ;)
{
r = bscf(mask);
cur = node->child(r); BVH::prefetch(cur,types);
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) return;
assert(stackPtr < stackEnd);
*stackPtr = cur; stackPtr++;
}
}
};
}
}

View file

@ -0,0 +1,31 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "bvh.h"
namespace embree
{
namespace isa
{
struct NearFarPrecalculations
{
size_t nearX, nearY, nearZ;
size_t farX, farY, farZ;
__forceinline NearFarPrecalculations() {}
__forceinline NearFarPrecalculations(const Vec3fa& dir, size_t N)
{
const size_t size = sizeof(float)*N;
nearX = (dir.x < 0.0f) ? 1*size : 0*size;
nearY = (dir.y < 0.0f) ? 3*size : 2*size;
nearZ = (dir.z < 0.0f) ? 5*size : 4*size;
farX = nearX ^ size;
farY = nearY ^ size;
farZ = nearZ ^ size;
}
};
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,257 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "node_intersector.h"
namespace embree
{
namespace isa
{
//////////////////////////////////////////////////////////////////////////////////////
// Frustum structure used in hybrid and stream traversal
//////////////////////////////////////////////////////////////////////////////////////
/*
Optimized frustum test. We calculate t=(p-org)/dir in ray/box
intersection. We assume the rays are split by octant, thus
dir intervals are either positive or negative in each
dimension.
Case 1: dir.min >= 0 && dir.max >= 0:
t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
Case 2: dir.min < 0 && dir.max < 0:
t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
*/
template<bool robust>
struct Frustum;
/* Fast variant */
template<>
struct Frustum<false>
{
__forceinline Frustum() {}
template<int K>
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
reduce_min(select(valid, org.y, pos_inf)),
reduce_min(select(valid, org.z, pos_inf)));
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
reduce_max(select(valid, org.y, neg_inf)),
reduce_max(select(valid, org.z, neg_inf)));
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
reduce_min(select(valid, rdir.y, pos_inf)),
reduce_min(select(valid, rdir.z, pos_inf)));
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
reduce_max(select(valid, rdir.y, neg_inf)),
reduce_max(select(valid, rdir.z, neg_inf)));
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
}
__forceinline void init(const Vec3fa& reduced_min_org,
const Vec3fa& reduced_max_org,
const Vec3fa& reduced_min_rdir,
const Vec3fa& reduced_max_rdir,
float reduced_min_dist,
float reduced_max_dist,
int N)
{
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
#if defined (__aarch64__)
neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
#else
min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
#endif
min_dist = reduced_min_dist;
max_dist = reduced_max_dist;
nf = NearFarPrecalculations(min_rdir, N);
}
template<int K>
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
{
max_dist = reduce_max(ray_tfar);
}
NearFarPrecalculations nf;
Vec3fa min_rdir;
Vec3fa max_rdir;
#if defined (__aarch64__)
Vec3fa neg_min_org_rdir;
Vec3fa neg_max_org_rdir;
#else
Vec3fa min_org_rdir;
Vec3fa max_org_rdir;
#endif
float min_dist;
float max_dist;
};
typedef Frustum<false> FrustumFast;
/* Robust variant */
template<>
struct Frustum<true>
{
__forceinline Frustum() {}
template<int K>
__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
reduce_min(select(valid, org.y, pos_inf)),
reduce_min(select(valid, org.z, pos_inf)));
const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
reduce_max(select(valid, org.y, neg_inf)),
reduce_max(select(valid, org.z, neg_inf)));
const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
reduce_min(select(valid, rdir.y, pos_inf)),
reduce_min(select(valid, rdir.z, pos_inf)));
const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
reduce_max(select(valid, rdir.y, neg_inf)),
reduce_max(select(valid, rdir.z, neg_inf)));
const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
}
__forceinline void init(const Vec3fa& reduced_min_org,
const Vec3fa& reduced_max_org,
const Vec3fa& reduced_min_rdir,
const Vec3fa& reduced_max_rdir,
float reduced_min_dist,
float reduced_max_dist,
int N)
{
const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
min_dist = reduced_min_dist;
max_dist = reduced_max_dist;
nf = NearFarPrecalculations(min_rdir, N);
}
template<int K>
__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
{
max_dist = reduce_max(ray_tfar);
}
NearFarPrecalculations nf;
Vec3fa min_rdir;
Vec3fa max_rdir;
Vec3fa min_org;
Vec3fa max_org;
float min_dist;
float max_dist;
};
typedef Frustum<true> FrustumRobust;
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N>
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
const FrustumFast& frustum, vfloat<N>& dist)
{
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
#if defined (__aarch64__)
const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
#else
const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
#endif
const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
dist = fmin;
const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
const vbool<N> vmask_node_hit = fmin <= fmax;
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
return m_node;
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N>
__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
const FrustumRobust& frustum, vfloat<N>& dist)
{
const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
const float round_up = 1.0f+2.0f*float(ulp);
const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
dist = fmin;
const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
return m_node;
}
}
}

View file

@ -0,0 +1,844 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "node_intersector.h"
namespace embree
{
namespace isa
{
//////////////////////////////////////////////////////////////////////////////////////
// Ray packet structure used in hybrid traversal
//////////////////////////////////////////////////////////////////////////////////////
template<int K, bool robust>
struct TravRayK;
/* Fast variant */
template<int K>
struct TravRayK<K, false>
{
__forceinline TravRayK() {}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
init(ray_org, ray_dir, N);
}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
init(ray_org, ray_dir, N);
tnear = ray_tnear;
tfar = ray_tfar;
}
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
org = ray_org;
dir = ray_dir;
rdir = rcp_safe(ray_dir);
#if defined(__aarch64__)
neg_org_rdir = -(org * rdir);
#elif defined(__AVX2__)
org_rdir = org * rdir;
#endif
if (N)
{
const int size = sizeof(float)*N;
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
}
}
Vec3vf<K> org;
Vec3vf<K> dir;
Vec3vf<K> rdir;
#if defined(__aarch64__)
Vec3vf<K> neg_org_rdir;
#elif defined(__AVX2__)
Vec3vf<K> org_rdir;
#endif
Vec3vi<K> nearXYZ;
vfloat<K> tnear;
vfloat<K> tfar;
};
template<int K>
using TravRayKFast = TravRayK<K, false>;
/* Robust variant */
template<int K>
struct TravRayK<K, true>
{
__forceinline TravRayK() {}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
init(ray_org, ray_dir, N);
}
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
{
init(ray_org, ray_dir, N);
tnear = ray_tnear;
tfar = ray_tfar;
}
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
{
org = ray_org;
dir = ray_dir;
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
if (N)
{
const int size = sizeof(float)*N;
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
}
}
Vec3vf<K> org;
Vec3vf<K> dir;
Vec3vf<K> rdir;
Vec3vi<K> nearXYZ;
vfloat<K> tnear;
vfloat<K> tfar;
};
template<int K>
using TravRayKRobust = TravRayK<K, true>;
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
const TravRayKFast<K>& ray, vfloat<K>& dist)
{
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
const TravRayKRobust<K>& ray, vfloat<K>& dist)
{
// FIXME: use per instruction rounding for AVX512
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
#if defined(__AVX512F__) // SKX
if (K == 16)
{
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast AABBNodeMB4D intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
#endif
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
if (unlikely(ref.isAABBNodeMB4D())) {
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
}
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Robust AABBNodeMB4D intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
if (unlikely(ref.isAABBNodeMB4D())) {
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
}
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast OBBNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K, bool robust>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
const TravRayK<K,robust>& ray, vfloat<K>& dist)
{
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
const vfloat<K> lclipMinY = org.y * nrdir.y;
const vfloat<K> lclipMinZ = org.z * nrdir.z;
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
if (robust) {
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
}
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Fast OBBNodeMB intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K, bool robust>
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
const Vec3vf<K> b0_lower = zero;
const Vec3vf<K> b0_upper = one;
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
const Vec3vf<K> rdir = rcp_safe(dir);
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
if (robust) {
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
}
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// QuantizedBaseNode intersection
//////////////////////////////////////////////////////////////////////////////////////
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
const TravRayK<K,false>& ray, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<N> lower_x = node->dequantizeLowerX();
const vfloat<N> upper_x = node->dequantizeUpperX();
const vfloat<N> lower_y = node->dequantizeLowerY();
const vfloat<N> upper_y = node->dequantizeUpperY();
const vfloat<N> lower_z = node->dequantizeLowerZ();
const vfloat<N> upper_z = node->dequantizeUpperZ();
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
#endif
#if defined(__AVX512F__) // SKX
if (K == 16)
{
/* use mixed float/int min/max */
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
dist = lnearP;
return lhit;
}
else
#endif
{
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
#if defined(__AVX512F__) // SKX
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
#else
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
#endif
dist = lnearP;
return lhit;
}
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
const TravRayK<K,true>& ray, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<N> lower_x = node->dequantizeLowerX();
const vfloat<N> upper_x = node->dequantizeUpperX();
const vfloat<N> lower_y = node->dequantizeLowerY();
const vfloat<N> upper_y = node->dequantizeUpperY();
const vfloat<N> lower_z = node->dequantizeLowerZ();
const vfloat<N> upper_z = node->dequantizeUpperZ();
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
#if defined(__aarch64__)
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
#elif defined(__AVX2__)
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
#else
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
#endif
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
template<int N, int K>
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
assert(movemask(node->validMask()) & ((size_t)1 << i));
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
const float round_up = 1.0f+3.0f*float(ulp);
const float round_down = 1.0f-3.0f*float(ulp);
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
dist = lnearP;
return lhit;
}
//////////////////////////////////////////////////////////////////////////////////////
// Node intersectors used in hybrid traversal
//////////////////////////////////////////////////////////////////////////////////////
/*! Intersects N nodes with K rays */
template<int N, int K, int types, bool robust>
struct BVHNNodeIntersectorK;
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
{
/* vmask is both an input and an output parameter! Its initial value should be the parent node
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
is actually required only for motion blur node intersections (because different rays may
have different times), so for regular nodes vmask is simply overwritten. */
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
assert(node.isOBBNodeMB());
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
}
return true;
}
};
template<int N, int K>
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
{
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
{
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
assert(node.isOBBNodeMB());
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
}
return true;
}
};
/*! Intersects N nodes with K rays */
template<int N, int K, bool robust>
struct BVHNQuantizedBaseNodeIntersectorK;
template<int N, int K>
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
{
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
const TravRayK<K,false>& ray, vfloat<K>& dist)
{
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
}
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
}
};
template<int N, int K>
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
{
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
const TravRayK<K,true>& ray, vfloat<K>& dist)
{
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
}
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
{
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
}
};
}
}

View file

@ -0,0 +1,474 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "ray.h"
#include "point_query.h"
#include "context.h"
namespace embree
{
class Scene;
/*! Base class for the acceleration structure data. */
class AccelData : public RefCount
{
ALIGNED_CLASS_(16);
public:
enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4, TY_GPU = 5 };
public:
AccelData (const Type type)
: bounds(empty), type(type) {}
/*! notifies the acceleration structure about the deletion of some geometry */
virtual void deleteGeometry(size_t geomID) {};
/*! clears the acceleration structure data */
virtual void clear() = 0;
/*! returns normal bounds */
__forceinline BBox3fa getBounds() const {
return bounds.bounds();
}
/*! returns bounds for some time */
__forceinline BBox3fa getBounds(float t) const {
return bounds.interpolate(t);
}
/*! returns linear bounds */
__forceinline LBBox3fa getLinearBounds() const {
return bounds;
}
/*! checks if acceleration structure is empty */
__forceinline bool isEmpty() const {
return bounds.bounds0.lower.x == float(pos_inf);
}
public:
LBBox3fa bounds; // linear bounds
Type type;
};
/*! Base class for all intersectable and buildable acceleration structures. */
class Accel : public AccelData
{
ALIGNED_CLASS_(16);
public:
struct Intersectors;
/*! Type of collide function */
typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr);
/*! Type of point query function */
typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */
PointQuery* query, /*!< point query for lookup */
PointQueryContext* context); /*!< point query context */
/*! Type of intersect function pointer for single rays. */
typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */
RTCRayHit& ray, /*!< ray to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 4. */
typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit4& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 8. */
typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit8& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 16. */
typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit16& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of occlusion function pointer for single rays. */
typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */
RTCRay& ray, /*!< ray to test occlusion */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 4. */
typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay4& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 8. */
typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay8& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 16. */
typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay16& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
typedef void (*ErrorFunc) ();
struct Collider
{
Collider (ErrorFunc error = nullptr)
: collide((CollideFunc)error), name(nullptr) {}
Collider (CollideFunc collide, const char* name)
: collide(collide), name(name) {}
operator bool() const { return name; }
public:
CollideFunc collide;
const char* name;
};
struct Intersector1
{
Intersector1 (ErrorFunc error = nullptr)
: intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {}
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name)
: intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {}
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name)
: intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc intersect;
OccludedFunc occluded;
PointQueryFunc pointQuery;
const char* name;
};
struct Intersector4
{
Intersector4 (ErrorFunc error = nullptr)
: intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {}
Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc4 intersect;
OccludedFunc4 occluded;
const char* name;
};
struct Intersector8
{
Intersector8 (ErrorFunc error = nullptr)
: intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {}
Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc8 intersect;
OccludedFunc8 occluded;
const char* name;
};
struct Intersector16
{
Intersector16 (ErrorFunc error = nullptr)
: intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {}
Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc16 intersect;
OccludedFunc16 occluded;
const char* name;
};
struct Intersectors
{
Intersectors()
: ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr) {}
Intersectors (ErrorFunc error)
: ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error) {}
void print(size_t ident)
{
if (collider.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "collider = " << collider.name << std::endl;
}
if (intersector1.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector1 = " << intersector1.name << std::endl;
}
if (intersector4.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector4 = " << intersector4.name << std::endl;
}
if (intersector8.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector8 = " << intersector8.name << std::endl;
}
if (intersector16.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector16 = " << intersector16.name << std::endl;
}
}
void select(bool filter)
{
if (intersector4_filter) {
if (filter) intersector4 = intersector4_filter;
else intersector4 = intersector4_nofilter;
}
if (intersector8_filter) {
if (filter) intersector8 = intersector8_filter;
else intersector8 = intersector8_nofilter;
}
if (intersector16_filter) {
if (filter) intersector16 = intersector16_filter;
else intersector16 = intersector16_nofilter;
}
}
__forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) {
assert(intersector1.pointQuery);
return intersector1.pointQuery(this,query,context);
}
/*! collides two scenes */
__forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) {
assert(collider.collide);
collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr);
}
/*! Intersects a single ray with the scene. */
__forceinline void intersect (RTCRayHit& ray, RayQueryContext* context) {
assert(intersector1.intersect);
intersector1.intersect(this,ray,context);
}
/*! Intersects a packet of 4 rays with the scene. */
__forceinline void intersect4 (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
assert(intersector4.intersect);
intersector4.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 8 rays with the scene. */
__forceinline void intersect8 (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
assert(intersector8.intersect);
intersector8.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 16 rays with the scene. */
__forceinline void intersect16 (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
assert(intersector16.intersect);
intersector16.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 4 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
assert(intersector4.intersect);
intersector4.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 8 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
assert(intersector8.intersect);
intersector8.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 16 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
assert(intersector16.intersect);
intersector16.intersect(valid,this,ray,context);
}
#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, RayQueryContext* context) {
const vint<4> mask = valid.mask32();
intersect4(&mask,(RTCRayHit4&)ray,context);
}
#endif
#if defined(__AVX__)
__forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, RayQueryContext* context) {
const vint<8> mask = valid.mask32();
intersect8(&mask,(RTCRayHit8&)ray,context);
}
#endif
#if defined(__AVX512F__)
__forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, RayQueryContext* context) {
const vint<16> mask = valid.mask32();
intersect16(&mask,(RTCRayHit16&)ray,context);
}
#endif
/*! Tests if single ray is occluded by the scene. */
__forceinline void occluded (RTCRay& ray, RayQueryContext* context) {
assert(intersector1.occluded);
intersector1.occluded(this,ray,context);
}
/*! Tests if a packet of 4 rays is occluded by the scene. */
__forceinline void occluded4 (const void* valid, RTCRay4& ray, RayQueryContext* context) {
assert(intersector4.occluded);
intersector4.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 8 rays is occluded by the scene. */
__forceinline void occluded8 (const void* valid, RTCRay8& ray, RayQueryContext* context) {
assert(intersector8.occluded);
intersector8.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 16 rays is occluded by the scene. */
__forceinline void occluded16 (const void* valid, RTCRay16& ray, RayQueryContext* context) {
assert(intersector16.occluded);
intersector16.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 4 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay4& ray, RayQueryContext* context) {
assert(intersector4.occluded);
intersector4.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 8 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay8& ray, RayQueryContext* context) {
assert(intersector8.occluded);
intersector8.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 16 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay16& ray, RayQueryContext* context) {
assert(intersector16.occluded);
intersector16.occluded(valid,this,ray,context);
}
#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void occluded(const vbool4& valid, RayK<4>& ray, RayQueryContext* context) {
const vint<4> mask = valid.mask32();
occluded4(&mask,(RTCRay4&)ray,context);
}
#endif
#if defined(__AVX__)
__forceinline void occluded(const vbool8& valid, RayK<8>& ray, RayQueryContext* context) {
const vint<8> mask = valid.mask32();
occluded8(&mask,(RTCRay8&)ray,context);
}
#endif
#if defined(__AVX512F__)
__forceinline void occluded(const vbool16& valid, RayK<16>& ray, RayQueryContext* context) {
const vint<16> mask = valid.mask32();
occluded16(&mask,(RTCRay16&)ray,context);
}
#endif
/*! Tests if single ray is occluded by the scene. */
__forceinline void intersect(RTCRay& ray, RayQueryContext* context) {
occluded(ray, context);
}
/*! Tests if a packet of K rays is occluded by the scene. */
template<int K>
__forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, RayQueryContext* context) {
occluded(valid, ray, context);
}
public:
AccelData* ptr;
void* leafIntersector;
Collider collider;
Intersector1 intersector1;
Intersector4 intersector4;
Intersector4 intersector4_filter;
Intersector4 intersector4_nofilter;
Intersector8 intersector8;
Intersector8 intersector8_filter;
Intersector8 intersector8_nofilter;
Intersector16 intersector16;
Intersector16 intersector16_filter;
Intersector16 intersector16_nofilter;
};
public:
/*! Construction */
Accel (const AccelData::Type type)
: AccelData(type) {}
/*! Construction */
Accel (const AccelData::Type type, const Intersectors& intersectors)
: AccelData(type), intersectors(intersectors) {}
/*! Virtual destructor */
virtual ~Accel() {}
/*! makes the acceleration structure immutable */
virtual void immutable () {}
/*! build acceleration structure */
virtual void build () = 0;
public:
Intersectors intersectors;
};
#define DEFINE_COLLIDER(symbol,collider) \
Accel::Collider symbol() { \
return Accel::Collider((Accel::CollideFunc)collider::collide, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR1(symbol,intersector) \
Accel::Intersector1 symbol() { \
return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \
(Accel::OccludedFunc )intersector::occluded, \
(Accel::PointQueryFunc)intersector::pointQuery,\
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR4(symbol,intersector) \
Accel::Intersector4 symbol() { \
return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \
(Accel::OccludedFunc4)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR8(symbol,intersector) \
Accel::Intersector8 symbol() { \
return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \
(Accel::OccludedFunc8)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR16(symbol,intersector) \
Accel::Intersector16 symbol() { \
return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \
(Accel::OccludedFunc16)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
}

View file

@ -0,0 +1,41 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "accel.h"
#include "builder.h"
namespace embree
{
class AccelInstance : public Accel
{
public:
AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors)
: Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {}
void immutable () {
builder.reset(nullptr);
}
public:
void build () {
if (builder) builder->build();
bounds = accel->bounds;
}
void deleteGeometry(size_t geomID) {
if (accel ) accel->deleteGeometry(geomID);
if (builder) builder->deleteGeometry(geomID);
}
void clear() {
if (accel) accel->clear();
if (builder) builder->clear();
}
private:
std::unique_ptr<AccelData> accel;
std::unique_ptr<Builder> builder;
};
}

View file

@ -0,0 +1,214 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "acceln.h"
#include "ray.h"
#include "../../include/embree4/rtcore_ray.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
AccelN::AccelN()
: Accel(AccelData::TY_ACCELN), accels() {}
AccelN::~AccelN()
{
for (size_t i=0; i<accels.size(); i++)
delete accels[i];
}
void AccelN::accels_add(Accel* accel)
{
assert(accel);
accels.push_back(accel);
}
void AccelN::accels_init()
{
for (size_t i=0; i<accels.size(); i++)
delete accels[i];
accels.clear();
}
bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context)
{
bool changed = false;
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
changed |= This->accels[i]->intersectors.pointQuery(query,context);
return changed;
}
void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect(ray,context);
}
void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect4(valid,ray,context);
}
void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect8(valid,ray,context);
}
void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect16(valid,ray,context);
}
void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded(ray,context);
if (ray.tfar < 0.0f) break;
}
}
void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded4(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON)
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
if (unlikely(none(valid0 & hit0))) break;
#endif
}
}
void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded8(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break;
#endif
}
}
void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded16(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
vbool4 valid2 = asBool(((vint4*)valid)[2]);
vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero);
vbool4 valid3 = asBool(((vint4*)valid)[3]);
vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero);
if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break;
#endif
}
}
void AccelN::accels_print(size_t ident)
{
for (size_t i=0; i<accels.size(); i++)
{
for (size_t j=0; j<ident; j++) std::cout << " ";
std::cout << "accels[" << i << "]" << std::endl;
accels[i]->intersectors.print(ident+2);
}
}
void AccelN::accels_immutable()
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->immutable();
}
void AccelN::accels_build ()
{
/* reduce memory consumption */
accels.shrink_to_fit();
/* build all acceleration structures in parallel */
parallel_for (accels.size(), [&] (size_t i) {
accels[i]->build();
});
/* create list of non-empty acceleration structures */
bool valid1 = true;
bool valid4 = true;
bool valid8 = true;
bool valid16 = true;
for (size_t i=0; i<accels.size(); i++) {
valid1 &= (bool) accels[i]->intersectors.intersector1;
valid4 &= (bool) accels[i]->intersectors.intersector4;
valid8 &= (bool) accels[i]->intersectors.intersector8;
valid16 &= (bool) accels[i]->intersectors.intersector16;
}
if (accels.size() == 1) {
type = accels[0]->type; // FIXME: should just assign entire Accel
bounds = accels[0]->bounds;
intersectors = accels[0]->intersectors;
}
else
{
type = AccelData::TY_ACCELN;
intersectors.ptr = this;
intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr);
intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr);
intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr);
intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr);
/*! calculate bounds */
bounds = empty;
for (size_t i=0; i<accels.size(); i++)
bounds.extend(accels[i]->bounds);
}
}
void AccelN::accels_select(bool filter)
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->intersectors.select(filter);
}
void AccelN::accels_deleteGeometry(size_t geomID)
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->deleteGeometry(geomID);
}
void AccelN::accels_clear()
{
for (size_t i=0; i<accels.size(); i++) {
accels[i]->clear();
}
}
}

View file

@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "accel.h"
namespace embree
{
/*! merges N acceleration structures together, by processing them in order */
class AccelN : public Accel
{
public:
AccelN ();
~AccelN();
public:
void accels_add(Accel* accel);
void accels_init();
public:
static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
public:
static void intersect (Accel::Intersectors* This, RTCRayHit& ray, RayQueryContext* context);
static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, RayQueryContext* context);
static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, RayQueryContext* context);
static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, RayQueryContext* context);
public:
static void occluded (Accel::Intersectors* This, RTCRay& ray, RayQueryContext* context);
static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, RayQueryContext* context);
static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, RayQueryContext* context);
static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, RayQueryContext* context);
public:
void accels_print(size_t ident);
void accels_immutable();
void accels_build ();
void accels_select(bool filter);
void accels_deleteGeometry(size_t geomID);
void accels_clear ();
public:
std::vector<Accel*> accels;
};
}

View file

@ -0,0 +1,17 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "accelset.h"
#include "scene.h"
namespace embree
{
AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps)
: Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {}
AccelSet::IntersectorN::IntersectorN (ErrorFunc error)
: intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
}

View file

@ -0,0 +1,347 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "builder.h"
#include "geometry.h"
#include "ray.h"
#include "hit.h"
namespace embree
{
struct IntersectFunctionNArguments;
struct OccludedFunctionNArguments;
struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
{
Geometry* geometry;
RTCScene forward_scene;
RTCIntersectArguments* args;
};
struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
{
Geometry* geometry;
RTCScene forward_scene;
RTCIntersectArguments* args;
};
/*! Base class for set of acceleration structures. */
class AccelSet : public Geometry
{
public:
typedef RTCIntersectFunctionN IntersectFuncN;
typedef RTCOccludedFunctionN OccludedFuncN;
typedef void (*ErrorFunc) ();
struct IntersectorN
{
IntersectorN (ErrorFunc error = nullptr) ;
IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name);
operator bool() const { return name; }
public:
static const char* type;
IntersectFuncN intersect;
OccludedFuncN occluded;
const char* name;
};
public:
/*! construction */
AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps);
/*! makes the acceleration structure immutable */
virtual void immutable () {}
/*! build accel */
virtual void build () = 0;
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid_non_empty(bounds(i,itime))) return false;
return true;
}
/*! Calculates the bounds of an item */
__forceinline BBox3fa bounds(size_t i, size_t itime = 0) const
{
BBox3fa box;
assert(i < size());
RTCBoundsFunctionArguments args;
args.geometryUserPtr = userPtr;
args.primID = (unsigned int)i;
args.timeStep = (unsigned int)itime;
args.bounds_o = (RTCBounds*)&box;
boundsFunc(&args);
return box;
}
/*! calculates the linear bounds of the i'th item at the itime'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const
{
BBox3fa box[2];
assert(i < size());
RTCBoundsFunctionArguments args;
args.geometryUserPtr = userPtr;
args.primID = (unsigned int)i;
args.timeStep = (unsigned int)(itime+0);
args.bounds_o = (RTCBounds*)&box[0];
boundsFunc(&args);
args.timeStep = (unsigned int)(itime+1);
args.bounds_o = (RTCBounds*)&box[1];
boundsFunc(&args);
return LBBox3fa(box[0],box[1]);
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid_non_empty(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds0; // use bounding box of first timestep to build BVH
return isvalid_non_empty(bounds);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const {
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
public:
/*! Intersects a single ray with the scene. */
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
int mask = -1;
IntersectFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
IntersectFuncN intersectFunc = nullptr;
intersectFunc = intersectorN.intersect;
if (context->getIntersectFunction())
intersectFunc = context->getIntersectFunction();
assert(intersectFunc);
intersectFunc(&args);
return mask != 0;
}
/*! Tests if single ray is occluded by the scene. */
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
int mask = -1;
OccludedFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
OccludedFuncN occludedFunc = nullptr;
occludedFunc = intersectorN.occluded;
if (context->getOccludedFunction())
occludedFunc = context->getOccludedFunction();
assert(occludedFunc);
occludedFunc(&args);
return mask != 0;
}
/*! Intersects a single ray with the scene. */
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
{
assert(primID < size());
int mask = -1;
IntersectFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = nullptr;
typedef void (*RTCIntersectFunctionSYCL)(const void* args);
RTCIntersectFunctionSYCL intersectFunc = nullptr;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
intersectFunc = (RTCIntersectFunctionSYCL) intersectorN.intersect;
#endif
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
if (context->getIntersectFunction())
intersectFunc = (RTCIntersectFunctionSYCL) context->getIntersectFunction();
if (intersectFunc)
intersectFunc(&args);
forward_scene = args.forward_scene;
return mask != 0;
}
/*! Tests if single ray is occluded by the scene. */
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
{
assert(primID < size());
int mask = -1;
OccludedFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = nullptr;
typedef void (*RTCOccludedFunctionSYCL)(const void* args);
RTCOccludedFunctionSYCL occludedFunc = nullptr;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
occludedFunc = (RTCOccludedFunctionSYCL) intersectorN.occluded;
#endif
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
if (context->getOccludedFunction())
occludedFunc = (RTCOccludedFunctionSYCL) context->getOccludedFunction();
if (occludedFunc)
occludedFunc(&args);
forward_scene = args.forward_scene;
return mask != 0;
}
/*! Intersects a packet of K rays with the scene. */
template<int K>
__forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
vint<K> mask = valid.mask32();
IntersectFunctionNArguments args;
args.valid = (int*)&mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = K;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
IntersectFuncN intersectFunc = nullptr;
intersectFunc = intersectorN.intersect;
if (context->getIntersectFunction())
intersectFunc = context->getIntersectFunction();
assert(intersectFunc);
intersectFunc(&args);
}
/*! Tests if a packet of K rays is occluded by the scene. */
template<int K>
__forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
vint<K> mask = valid.mask32();
OccludedFunctionNArguments args;
args.valid = (int*)&mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = K;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
OccludedFuncN occludedFunc = nullptr;
occludedFunc = intersectorN.occluded;
if (context->getOccludedFunction())
occludedFunc = context->getOccludedFunction();
assert(occludedFunc);
occludedFunc(&args);
}
public:
RTCBoundsFunction boundsFunc;
IntersectorN intersectorN;
};
#define DEFINE_SET_INTERSECTORN(symbol,intersector) \
AccelSet::IntersectorN symbol() { \
return AccelSet::IntersectorN(intersector::intersect, \
intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
}

View file

@ -0,0 +1,82 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "alloc.h"
#include "../../common/sys/thread.h"
#if defined(APPLE) && defined(__aarch64__)
#include "../../common/sys/barrier.h"
#endif
namespace embree
{
__thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr;
MutexSys FastAllocator::s_thread_local_allocators_lock;
std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators;
struct fast_allocator_regression_test : public RegressionTest
{
BarrierSys barrier;
std::atomic<size_t> numFailed;
std::unique_ptr<FastAllocator> alloc;
fast_allocator_regression_test()
: RegressionTest("fast_allocator_regression_test"), numFailed(0)
{
registerRegressionTest(this);
}
static void thread_alloc(fast_allocator_regression_test* This)
{
FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator();
size_t* ptrs[1000];
for (size_t j=0; j<1000; j++)
{
This->barrier.wait();
for (size_t i=0; i<1000; i++) {
ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32));
*ptrs[i] = size_t(threadalloc.talloc0) + i;
}
for (size_t i=0; i<1000; i++) {
if (*ptrs[i] != size_t(threadalloc.talloc0) + i)
This->numFailed++;
}
This->barrier.wait();
}
}
bool run ()
{
alloc = make_unique(new FastAllocator(nullptr,false));
numFailed.store(0);
size_t numThreads = getNumberOfLogicalThreads();
barrier.init(numThreads+1);
/* create threads */
std::vector<thread_t> threads;
for (size_t i=0; i<numThreads; i++)
threads.push_back(createThread((thread_func)thread_alloc,this));
/* run test */
for (size_t i=0; i<1000; i++)
{
alloc->reset();
barrier.wait();
barrier.wait();
}
/* destroy threads */
for (size_t i=0; i<numThreads; i++)
join(threads[i]);
alloc = nullptr;
return numFailed == 0;
}
};
fast_allocator_regression_test fast_allocator_regression;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,280 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
namespace embree
{
/*! Implements an API data buffer object. This class may or may not own the data. */
class Buffer : public RefCount
{
public:
/*! Buffer construction */
//Buffer()
//: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
/*! Buffer construction */
Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
: device(device), numBytes(numBytes_in)
{
device->refInc();
if (ptr_in)
{
shared = true;
ptr = (char*)ptr_in;
}
else
{
shared = false;
alloc();
}
}
/*! Buffer destruction */
~Buffer() {
free();
device->refDec();
}
/*! this class is not copyable */
private:
Buffer(const Buffer& other) DELETED; // do not implement
Buffer& operator =(const Buffer& other) DELETED; // do not implement
public:
/* inits and allocates the buffer */
void create(Device* device_in, size_t numBytes_in)
{
init(device_in, numBytes_in);
alloc();
}
/* inits the buffer */
void init(Device* device_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = nullptr;
numBytes = numBytes_in;
shared = false;
}
/*! sets shared buffer */
void set(Device* device_in, void* ptr_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = (char*)ptr_in;
if (numBytes_in != (size_t)-1)
numBytes = numBytes_in;
shared = true;
}
/*! allocated buffer */
void alloc()
{
device->memoryMonitor(this->bytes(), false);
size_t b = (this->bytes()+15) & ssize_t(-16);
ptr = (char*)device->malloc(b,16);
}
/*! frees the buffer */
void free()
{
if (shared) return;
device->free(ptr);
device->memoryMonitor(-ssize_t(this->bytes()), true);
ptr = nullptr;
}
/*! gets buffer pointer */
void* data()
{
/* report error if buffer is not existing */
if (!device)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
/* return buffer */
return ptr;
}
/*! returns pointer to first element */
__forceinline char* getPtr() const {
return ptr;
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
return numBytes;
}
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr;
}
public:
Device* device; //!< device to report memory usage to
char* ptr; //!< pointer to buffer data
size_t numBytes; //!< number of bytes in the buffer
bool shared; //!< set if memory is shared with application
};
/*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
class RawBufferView
{
public:
/*! Buffer construction */
RawBufferView()
: ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
public:
/*! sets the buffer view */
void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
{
if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
ptr_ofs = buffer_in->ptr + offset_in;
stride = stride_in;
num = num_in;
format = format_in;
modCounter++;
modified = true;
buffer = buffer_in;
}
/*! returns pointer to the first element */
__forceinline char* getPtr() const {
return ptr_ofs;
}
/*! returns pointer to the i'th element */
__forceinline char* getPtr(size_t i) const
{
assert(i<num);
return ptr_ofs + i*stride;
}
/*! returns the number of elements of the buffer */
__forceinline size_t size() const {
return num;
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
return num*stride;
}
/*! returns the buffer stride */
__forceinline unsigned getStride() const
{
assert(stride <= unsigned(inf));
return unsigned(stride);
}
/*! return the buffer format */
__forceinline RTCFormat getFormat() const {
return format;
}
/*! mark buffer as modified or unmodified */
__forceinline void setModified() {
modCounter++;
modified = true;
}
/*! mark buffer as modified or unmodified */
__forceinline bool isModified(unsigned int otherModCounter) const {
return modCounter > otherModCounter;
}
/*! mark buffer as modified or unmodified */
__forceinline bool isLocalModified() const {
return modified;
}
/*! clear local modified flag */
__forceinline void clearLocalModified() {
modified = false;
}
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr_ofs;
}
/*! checks padding to 16 byte check, fails hard */
__forceinline void checkPadding16() const
{
if (ptr_ofs && num)
volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?
}
public:
char* ptr_ofs; //!< base pointer plus offset
size_t stride; //!< stride of the buffer in bytes
size_t num; //!< number of elements in the buffer
RTCFormat format; //!< format of the buffer
unsigned int modCounter; //!< version ID of this buffer
bool modified; //!< local modified data
int userData; //!< special data
Ref<Buffer> buffer; //!< reference to the parent buffer
};
/*! A typed contiguous range of a buffer. This class does not own the buffer content. */
template<typename T>
class BufferView : public RawBufferView
{
public:
typedef T value_type;
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
};
template<>
class BufferView<Vec3fa> : public RawBufferView
{
public:
typedef Vec3fa value_type;
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
/*! access to the ith element of the buffer */
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa::loadu(ptr_ofs + i*stride);
}
/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
Vec3fa::storeu(ptr_ofs + i*stride, v);
}
#else
/*! access to the ith element of the buffer */
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));
}
/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);
}
#endif
};
}

View file

@ -0,0 +1,60 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "accel.h"
namespace embree
{
#define MODE_HIGH_QUALITY (1<<8)
/*! virtual interface for all hierarchy builders */
class Builder : public RefCount {
public:
static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
/*! initiates the hierarchy builder */
virtual void build() = 0;
/*! notifies the builder about the deletion of some geometry */
virtual void deleteGeometry(size_t geomID) {};
/*! clears internal builder state */
virtual void clear() = 0;
};
/*! virtual interface for progress monitor class */
struct BuildProgressMonitor {
virtual void operator() (size_t dn) const = 0;
};
/*! build the progress monitor interface from a closure */
template<typename Closure>
struct ProgressMonitorClosure : BuildProgressMonitor
{
public:
ProgressMonitorClosure (const Closure& closure) : closure(closure) {}
void operator() (size_t dn) const { closure(dn); }
private:
const Closure closure;
};
template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) {
return ProgressMonitorClosure<Closure>(closure);
}
struct LineSegments;
struct TriangleMesh;
struct QuadMesh;
struct UserGeometry;
class Scene;
typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder);
typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
}

View file

@ -0,0 +1,173 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "rtcore.h"
#include "point_query.h"
namespace embree
{
class Scene;
struct RayQueryContext
{
public:
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCIntersectArguments* args)
: scene(scene), user(user_context), args(args) {}
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCOccludedArguments* args)
: scene(scene), user(user_context), args((RTCIntersectArguments*)args) {}
__forceinline bool hasContextFilter() const {
return args->filter != nullptr;
}
RTCFilterFunctionN getFilter() const {
return args->filter;
}
RTCIntersectFunctionN getIntersectFunction() const {
return args->intersect;
}
RTCOccludedFunctionN getOccludedFunction() const {
return (RTCOccludedFunctionN) args->intersect;
}
__forceinline bool isCoherent() const {
return embree::isCoherent(args->flags);
}
__forceinline bool isIncoherent() const {
return embree::isIncoherent(args->flags);
}
__forceinline bool enforceArgumentFilterFunction() const {
return args->flags & RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER;
}
#if RTC_MIN_WIDTH
__forceinline float getMinWidthDistanceFactor() const {
return args->minWidthDistanceFactor;
}
#endif
public:
Scene* scene = nullptr;
RTCRayQueryContext* user = nullptr;
RTCIntersectArguments* args = nullptr;
};
template<int M, typename Geometry>
__forceinline Vec4vf<M> enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v)
{
#if RTC_MIN_WIDTH
const vfloat<M> d = length(Vec3vf<M>(v) - ray_org);
const vfloat<M> r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
return Vec4vf<M>(v.x,v.y,v.z,r);
#else
return v;
#endif
}
template<typename Geometry>
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v)
{
#if RTC_MIN_WIDTH
const float d = length(Vec3fa(v) - ray_org);
const float r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
return Vec3ff(v.x,v.y,v.z,r);
#else
return v;
#endif
}
template<typename Geometry>
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec4f& v) {
return enlargeRadiusToMinWidth(context,geom,ray_org,Vec3ff(v.x,v.y,v.z,v.w));
}
enum PointQueryType
{
POINT_QUERY_TYPE_UNDEFINED = 0,
POINT_QUERY_TYPE_SPHERE = 1,
POINT_QUERY_TYPE_AABB = 2,
};
typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
struct PointQueryContext
{
public:
__forceinline PointQueryContext(Scene* scene,
PointQuery* query_ws,
PointQueryType query_type,
PointQueryFunction func,
RTCPointQueryContext* userContext,
float similarityScale,
void* userPtr)
: scene(scene)
, tstate(nullptr)
, query_ws(query_ws)
, query_type(query_type)
, func(func)
, userContext(userContext)
, similarityScale(similarityScale)
, userPtr(userPtr)
, primID(RTC_INVALID_GEOMETRY_ID)
, geomID(RTC_INVALID_GEOMETRY_ID)
, query_radius(query_ws->radius)
{
update();
}
public:
__forceinline void update()
{
if (query_type == POINT_QUERY_TYPE_AABB) {
assert(similarityScale == 0.f);
updateAABB();
}
else{
query_radius = Vec3fa(query_ws->radius * similarityScale);
}
if (userContext->instStackSize == 0) {
assert(similarityScale == 1.f);
}
}
__forceinline void updateAABB()
{
if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) {
query_radius = Vec3fa(query_ws->radius);
return;
}
const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius));
bbox = xfmBounds(m, bbox);
query_radius = 0.5f * (bbox.upper - bbox.lower);
}
public:
Scene* scene;
void* tstate;
PointQuery* query_ws; // the original world space point query
PointQueryType query_type;
PointQueryFunction func;
RTCPointQueryContext* userContext;
float similarityScale;
void* userPtr;
unsigned int primID;
unsigned int geomID;
Vec3fa query_radius; // used if the query is converted to an AABB internally
};
}

View file

@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/sys/platform.h"
#include "../../common/sys/sysinfo.h"
#include "../../common/sys/thread.h"
#include "../../common/sys/alloc.h"
#include "../../common/sys/ref.h"
#include "../../common/sys/intrinsics.h"
#include "../../common/sys/atomic.h"
#include "../../common/sys/mutex.h"
#include "../../common/sys/vector.h"
#include "../../common/sys/array.h"
#include "../../common/sys/estring.h"
#include "../../common/sys/regression.h"
#include "../../common/sys/vector.h"
#include "../../common/math/emath.h"
#include "../../common/math/transcendental.h"
#include "../../common/simd/simd.h"
#include "../../common/math/vec2.h"
#include "../../common/math/vec3.h"
#include "../../common/math/vec4.h"
#include "../../common/math/vec2fa.h"
#include "../../common/math/vec3fa.h"
#include "../../common/math/interval.h"
#include "../../common/math/bbox.h"
#include "../../common/math/obbox.h"
#include "../../common/math/lbbox.h"
#include "../../common/math/linearspace2.h"
#include "../../common/math/linearspace3.h"
#include "../../common/math/affinespace.h"
#include "../../common/math/range.h"
#include "../../common/lexers/tokenstream.h"
#define COMMA ,
#include "../config.h"
#include "isa.h"
#include "stat.h"
#include "profile.h"
#include "rtcore.h"
#include "vector.h"
#include "state.h"
#include "instance_stack.h"
#include <vector>
#include <map>
#include <algorithm>
#include <functional>
#include <utility>
#include <sstream>
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// Vec2 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec2vf = Vec2<vfloat<N>>;
template<int N> using Vec2vd = Vec2<vdouble<N>>;
template<int N> using Vec2vr = Vec2<vreal<N>>;
template<int N> using Vec2vi = Vec2<vint<N>>;
template<int N> using Vec2vl = Vec2<vllong<N>>;
template<int N> using Vec2vb = Vec2<vbool<N>>;
template<int N> using Vec2vbf = Vec2<vboolf<N>>;
template<int N> using Vec2vbd = Vec2<vboold<N>>;
typedef Vec2<vfloat4> Vec2vf4;
typedef Vec2<vdouble4> Vec2vd4;
typedef Vec2<vreal4> Vec2vr4;
typedef Vec2<vint4> Vec2vi4;
typedef Vec2<vllong4> Vec2vl4;
typedef Vec2<vbool4> Vec2vb4;
typedef Vec2<vboolf4> Vec2vbf4;
typedef Vec2<vboold4> Vec2vbd4;
typedef Vec2<vfloat8> Vec2vf8;
typedef Vec2<vdouble8> Vec2vd8;
typedef Vec2<vreal8> Vec2vr8;
typedef Vec2<vint8> Vec2vi8;
typedef Vec2<vllong8> Vec2vl8;
typedef Vec2<vbool8> Vec2vb8;
typedef Vec2<vboolf8> Vec2vbf8;
typedef Vec2<vboold8> Vec2vbd8;
typedef Vec2<vfloat16> Vec2vf16;
typedef Vec2<vdouble16> Vec2vd16;
typedef Vec2<vreal16> Vec2vr16;
typedef Vec2<vint16> Vec2vi16;
typedef Vec2<vllong16> Vec2vl16;
typedef Vec2<vbool16> Vec2vb16;
typedef Vec2<vboolf16> Vec2vbf16;
typedef Vec2<vboold16> Vec2vbd16;
typedef Vec2<vfloatx> Vec2vfx;
typedef Vec2<vdoublex> Vec2vdx;
typedef Vec2<vrealx> Vec2vrx;
typedef Vec2<vintx> Vec2vix;
typedef Vec2<vllongx> Vec2vlx;
typedef Vec2<vboolx> Vec2vbx;
typedef Vec2<vboolfx> Vec2vbfx;
typedef Vec2<vbooldx> Vec2vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Vec3 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec3vf = Vec3<vfloat<N>>;
template<int N> using Vec3vd = Vec3<vdouble<N>>;
template<int N> using Vec3vr = Vec3<vreal<N>>;
template<int N> using Vec3vi = Vec3<vint<N>>;
template<int N> using Vec3vl = Vec3<vllong<N>>;
template<int N> using Vec3vb = Vec3<vbool<N>>;
template<int N> using Vec3vbf = Vec3<vboolf<N>>;
template<int N> using Vec3vbd = Vec3<vboold<N>>;
typedef Vec3<vfloat4> Vec3vf4;
typedef Vec3<vdouble4> Vec3vd4;
typedef Vec3<vreal4> Vec3vr4;
typedef Vec3<vint4> Vec3vi4;
typedef Vec3<vllong4> Vec3vl4;
typedef Vec3<vbool4> Vec3vb4;
typedef Vec3<vboolf4> Vec3vbf4;
typedef Vec3<vboold4> Vec3vbd4;
typedef Vec3<vfloat8> Vec3vf8;
typedef Vec3<vdouble8> Vec3vd8;
typedef Vec3<vreal8> Vec3vr8;
typedef Vec3<vint8> Vec3vi8;
typedef Vec3<vllong8> Vec3vl8;
typedef Vec3<vbool8> Vec3vb8;
typedef Vec3<vboolf8> Vec3vbf8;
typedef Vec3<vboold8> Vec3vbd8;
typedef Vec3<vfloat16> Vec3vf16;
typedef Vec3<vdouble16> Vec3vd16;
typedef Vec3<vreal16> Vec3vr16;
typedef Vec3<vint16> Vec3vi16;
typedef Vec3<vllong16> Vec3vl16;
typedef Vec3<vbool16> Vec3vb16;
typedef Vec3<vboolf16> Vec3vbf16;
typedef Vec3<vboold16> Vec3vbd16;
typedef Vec3<vfloatx> Vec3vfx;
typedef Vec3<vdoublex> Vec3vdx;
typedef Vec3<vrealx> Vec3vrx;
typedef Vec3<vintx> Vec3vix;
typedef Vec3<vllongx> Vec3vlx;
typedef Vec3<vboolx> Vec3vbx;
typedef Vec3<vboolfx> Vec3vbfx;
typedef Vec3<vbooldx> Vec3vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Vec4 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec4vf = Vec4<vfloat<N>>;
template<int N> using Vec4vd = Vec4<vdouble<N>>;
template<int N> using Vec4vr = Vec4<vreal<N>>;
template<int N> using Vec4vi = Vec4<vint<N>>;
template<int N> using Vec4vl = Vec4<vllong<N>>;
template<int N> using Vec4vb = Vec4<vbool<N>>;
template<int N> using Vec4vbf = Vec4<vboolf<N>>;
template<int N> using Vec4vbd = Vec4<vboold<N>>;
typedef Vec4<vfloat4> Vec4vf4;
typedef Vec4<vdouble4> Vec4vd4;
typedef Vec4<vreal4> Vec4vr4;
typedef Vec4<vint4> Vec4vi4;
typedef Vec4<vllong4> Vec4vl4;
typedef Vec4<vbool4> Vec4vb4;
typedef Vec4<vboolf4> Vec4vbf4;
typedef Vec4<vboold4> Vec4vbd4;
typedef Vec4<vfloat8> Vec4vf8;
typedef Vec4<vdouble8> Vec4vd8;
typedef Vec4<vreal8> Vec4vr8;
typedef Vec4<vint8> Vec4vi8;
typedef Vec4<vllong8> Vec4vl8;
typedef Vec4<vbool8> Vec4vb8;
typedef Vec4<vboolf8> Vec4vbf8;
typedef Vec4<vboold8> Vec4vbd8;
typedef Vec4<vfloat16> Vec4vf16;
typedef Vec4<vdouble16> Vec4vd16;
typedef Vec4<vreal16> Vec4vr16;
typedef Vec4<vint16> Vec4vi16;
typedef Vec4<vllong16> Vec4vl16;
typedef Vec4<vbool16> Vec4vb16;
typedef Vec4<vboolf16> Vec4vbf16;
typedef Vec4<vboold16> Vec4vbd16;
typedef Vec4<vfloatx> Vec4vfx;
typedef Vec4<vdoublex> Vec4vdx;
typedef Vec4<vrealx> Vec4vrx;
typedef Vec4<vintx> Vec4vix;
typedef Vec4<vllongx> Vec4vlx;
typedef Vec4<vboolx> Vec4vbx;
typedef Vec4<vboolfx> Vec4vbfx;
typedef Vec4<vbooldx> Vec4vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Other shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using BBox3vf = BBox<Vec3vf<N>>;
typedef BBox<Vec3vf4> BBox3vf4;
typedef BBox<Vec3vf8> BBox3vf8;
typedef BBox<Vec3vf16> BBox3vf16;
/* calculate time segment itime and fractional time ftime */
__forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
{
const float timeScaled = time * numTimeSegments;
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return int(itimef);
}
__forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
{
const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return int(itimef);
}
template<int N>
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
{
const vfloat<N> timeScaled = time * numTimeSegments;
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return vint<N>(itimef);
}
template<int N>
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
{
const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return vint<N>(itimef);
}
/* calculate overlapping time segment range */
__forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
{
const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
const float round_down = 1.0f-2.0f*float(ulp);
const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
return make_range(itime_lower, itime_upper);
}
/* calculate overlapping time segment range */
__forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
{
const float lower = (range.lower-time_range.lower)/time_range.size();
const float upper = (range.upper-time_range.lower)/time_range.size();
return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
}
}

View file

@ -0,0 +1,730 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "device.h"
#include "../../common/tasking/taskscheduler.h"
#include "../hash.h"
#include "scene_triangle_mesh.h"
#include "scene_user_geometry.h"
#include "scene_instance.h"
#include "scene_curves.h"
#include "scene_subdiv_mesh.h"
#include "../subdiv/tessellation_cache.h"
#include "acceln.h"
#include "geometry.h"
#include "../geometry/cylinder.h"
#include "../bvh/bvh4_factory.h"
#include "../bvh/bvh8_factory.h"
#include "../../common/sys/alloc.h"
#if defined(EMBREE_SYCL_SUPPORT)
# include "../level_zero/ze_wrapper.h"
#endif
namespace embree
{
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
ssize_t Device::debug_int0 = 0;
ssize_t Device::debug_int1 = 0;
ssize_t Device::debug_int2 = 0;
ssize_t Device::debug_int3 = 0;
static MutexSys g_mutex;
static std::map<Device*,size_t> g_cache_size_map;
static std::map<Device*,size_t> g_num_threads_map;
struct TaskArena
{
#if USE_TASK_ARENA
std::unique_ptr<tbb::task_arena> arena;
#endif
};
Device::Device (const char* cfg) : arena(new TaskArena())
{
/* check that CPU supports lowest ISA */
if (!hasISA(ISA)) {
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
}
/* set default frequency level for detected CPU */
switch (getCPUModel()) {
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
}
/* initialize global state */
#if defined(EMBREE_CONFIG)
State::parseString(EMBREE_CONFIG);
#endif
State::parseString(cfg);
State::verify();
/* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
if (!checkISASupport()) {
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
}
/*! do some internal tests */
assert(isa::Cylinder::verify());
/*! enable huge page support if desired */
#if defined(__WIN32__)
if (State::enable_selockmemoryprivilege)
State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
#endif
State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
/*! set tessellation cache size */
setCacheSize( State::tessellation_cache_size );
/*! enable some floating point exceptions to catch bugs */
if (State::float_exceptions)
{
int exceptions = _MM_MASK_MASK;
//exceptions &= ~_MM_MASK_INVALID;
exceptions &= ~_MM_MASK_DENORM;
exceptions &= ~_MM_MASK_DIV_ZERO;
//exceptions &= ~_MM_MASK_OVERFLOW;
//exceptions &= ~_MM_MASK_UNDERFLOW;
//exceptions &= ~_MM_MASK_INEXACT;
_MM_SET_EXCEPTION_MASK(exceptions);
}
/* print info header */
if (State::verbosity(1))
print();
if (State::verbosity(2))
State::print();
/* register all algorithms */
bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
#if defined(EMBREE_TARGET_SIMD8)
bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
#endif
/* setup tasking system */
initTaskingSystem(numThreads);
}
Device::~Device ()
{
setCacheSize(0);
exitTaskingSystem();
}
std::string getEnabledTargets()
{
std::string v;
#if defined(EMBREE_TARGET_SSE2)
v += "SSE2 ";
#endif
#if defined(EMBREE_TARGET_SSE42)
v += "SSE4.2 ";
#endif
#if defined(EMBREE_TARGET_AVX)
v += "AVX ";
#endif
#if defined(EMBREE_TARGET_AVX2)
v += "AVX2 ";
#endif
#if defined(EMBREE_TARGET_AVX512)
v += "AVX512 ";
#endif
return v;
}
std::string getEmbreeFeatures()
{
std::string v;
#if defined(EMBREE_RAY_MASK)
v += "raymasks ";
#endif
#if defined (EMBREE_BACKFACE_CULLING)
v += "backfaceculling ";
#endif
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
v += "backfacecullingcurves ";
#endif
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
v += "backfacecullingspheres ";
#endif
#if defined(EMBREE_FILTER_FUNCTION)
v += "intersection_filter ";
#endif
#if defined (EMBREE_COMPACT_POLYS)
v += "compact_polys ";
#endif
return v;
}
void Device::print()
{
const int cpu_features = getCPUFeatures();
std::cout << std::endl;
std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
std::cout << " Compiler : " << getCompilerName() << std::endl;
std::cout << " Build : ";
#if defined(DEBUG)
std::cout << "Debug " << std::endl;
#else
std::cout << "Release " << std::endl;
#endif
std::cout << " Platform : " << getPlatformName() << std::endl;
std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
std::cout << " Config" << std::endl;
std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
std::cout << " Features: " << getEmbreeFeatures() << std::endl;
std::cout << " Tasking : ";
#if defined(TASKING_TBB)
std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
#if TBB_INTERFACE_VERSION >= 12002
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
#else
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
#endif
#endif
#if defined(TASKING_INTERNAL)
std::cout << "internal_tasking_system ";
#endif
#if defined(TASKING_PPL)
std::cout << "PPL ";
#endif
std::cout << std::endl;
/* check of FTZ and DAZ flags are set in CSR */
if (!hasFTZ || !hasDAZ)
{
#if !defined(_DEBUG)
if (State::verbosity(1))
#endif
{
std::cout << std::endl;
std::cout << "================================================================================" << std::endl;
std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
<< " in the MXCSR control and status register. This can have a severe " << std::endl
<< " performance impact. Please enable these modes for each application " << std::endl
<< " thread the following way:" << std::endl
<< std::endl
<< " #include \"xmmintrin.h\"" << std::endl
<< " #include \"pmmintrin.h\"" << std::endl
<< std::endl
<< " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
<< " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
std::cout << "================================================================================" << std::endl;
std::cout << std::endl;
}
}
std::cout << std::endl;
}
void Device::setDeviceErrorCode(RTCError error)
{
RTCError* stored_error = errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
}
RTCError Device::getDeviceErrorCode()
{
RTCError* stored_error = errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
}
void Device::setThreadErrorCode(RTCError error)
{
RTCError* stored_error = g_errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
}
RTCError Device::getThreadErrorCode()
{
RTCError* stored_error = g_errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
}
void Device::process_error(Device* device, RTCError error, const char* str)
{
/* store global error code when device construction failed */
if (!device)
return setThreadErrorCode(error);
/* print error when in verbose mode */
if (device->verbosity(1))
{
switch (error) {
case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
default : std::cerr << "Embree: Invalid error code"; break;
};
if (str) std::cerr << ", (" << str << ")";
std::cerr << std::endl;
}
/* call user specified error callback */
if (device->error_function)
device->error_function(device->error_function_userptr,error,str);
/* record error code */
device->setDeviceErrorCode(error);
}
void Device::memoryMonitor(ssize_t bytes, bool post)
{
if (State::memory_monitor_function && bytes != 0) {
if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
}
}
}
}
size_t getMaxNumThreads()
{
size_t maxNumThreads = 0;
for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
maxNumThreads = max(maxNumThreads, (*i).second);
if (maxNumThreads == 0)
maxNumThreads = std::numeric_limits<size_t>::max();
return maxNumThreads;
}
size_t getMaxCacheSize()
{
size_t maxCacheSize = 0;
for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
maxCacheSize = max(maxCacheSize, (*i).second);
return maxCacheSize;
}
void Device::setCacheSize(size_t bytes)
{
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
Lock<MutexSys> lock(g_mutex);
if (bytes == 0) g_cache_size_map.erase(this);
else g_cache_size_map[this] = bytes;
size_t maxCacheSize = getMaxCacheSize();
resizeTessellationCache(maxCacheSize);
#endif
}
void Device::initTaskingSystem(size_t numThreads)
{
Lock<MutexSys> lock(g_mutex);
if (numThreads == 0)
g_num_threads_map[this] = std::numeric_limits<size_t>::max();
else
g_num_threads_map[this] = numThreads;
/* create task scheduler */
size_t maxNumThreads = getMaxNumThreads();
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
#if USE_TASK_ARENA
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
#endif
}
void Device::exitTaskingSystem()
{
Lock<MutexSys> lock(g_mutex);
g_num_threads_map.erase(this);
/* terminate tasking system */
if (g_num_threads_map.size() == 0) {
TaskScheduler::destroy();
}
/* or configure new number of threads */
else {
size_t maxNumThreads = getMaxNumThreads();
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
}
#if USE_TASK_ARENA
arena->arena.reset();
#endif
}
void Device::execute(bool join, const std::function<void()>& func)
{
#if USE_TASK_ARENA
if (join) {
arena->arena->execute(func);
}
else
#endif
{
func();
}
}
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
{
/* hidden internal properties */
switch ((size_t)prop)
{
case 1000000: debug_int0 = val; return;
case 1000001: debug_int1 = val; return;
case 1000002: debug_int2 = val; return;
case 1000003: debug_int3 = val; return;
}
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
}
ssize_t Device::getProperty(const RTCDeviceProperty prop)
{
size_t iprop = (size_t)prop;
/* get name of internal regression test */
if (iprop >= 2000000 && iprop < 3000000)
{
RegressionTest* test = getRegressionTest(iprop-2000000);
if (test) return (ssize_t) test->name.c_str();
else return 0;
}
/* run internal regression test */
if (iprop >= 3000000 && iprop < 4000000)
{
RegressionTest* test = getRegressionTest(iprop-3000000);
if (test) return test->run();
else return 0;
}
/* documented properties */
switch (prop)
{
case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
#endif
#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
#endif
#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
#endif
#if defined(EMBREE_RAY_MASK)
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING_CURVES)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
#endif
#if defined(EMBREE_COMPACT_POLYS)
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
#endif
#if defined(EMBREE_FILTER_FUNCTION)
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
#endif
#if defined(EMBREE_IGNORE_INVALID_RAYS)
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
#endif
#if defined(TASKING_INTERNAL)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
#endif
#if defined(TASKING_TBB)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
#endif
#if defined(TASKING_PPL)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
#endif
#if defined(EMBREE_GEOMETRY_TRIANGLE)
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_CURVE)
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_USER)
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_POINT)
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(TASKING_PPL)
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
#else
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
#endif
#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
#endif
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
};
}
void* Device::malloc(size_t size, size_t align) {
return alignedMalloc(size,align);
}
void Device::free(void* ptr) {
alignedFree(ptr);
}
#if defined(EMBREE_SYCL_SUPPORT)
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
: Device(cfg), gpu_context(sycl_context)
{
/* initialize ZeWrapper */
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
/* take first device as default device */
auto devices = gpu_context.get_devices();
if (devices.size() == 0)
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
gpu_device = devices[0];
/* check if RTAS build extension is available */
sycl::platform platform = gpu_device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (!ze_rtas_builder)
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
#else
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
#endif
if (State::verbosity(1))
{
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << " Internal RTAS Builder" << std::endl;
else
std::cout << " Level Zero RTAS Builder" << std::endl;
}
/* check if extension library can get loaded */
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
if (result == ZE_RESULT_SUCCESS)
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
if (State::verbosity(1))
{
sycl::platform platform = gpu_context.get_platform();
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
std::cout << std::endl;
}
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
}
DeviceGPU::~DeviceGPU()
{
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
}
void DeviceGPU::enter() {
enableUSMAllocEmbree(&gpu_context,&gpu_device);
}
void DeviceGPU::leave() {
disableUSMAllocEmbree();
}
void* DeviceGPU::malloc(size_t size, size_t align) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
}
void DeviceGPU::free(void* ptr) {
alignedSYCLFree(&gpu_context,ptr);
}
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
gpu_device = sycl_device_in;
}
#endif
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
: device((Device*)hdevice)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
: device(((Scene*)hscene)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
: device(((Geometry*)hgeometry)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
: device(((Buffer*)hbuffer)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::~DeviceEnterLeave() {
device->leave();
device->refDec();
}
}

View file

@ -0,0 +1,194 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "state.h"
#include "accel.h"
namespace embree
{
class BVH4Factory;
class BVH8Factory;
struct TaskArena;
class Device : public State, public MemoryMonitorInterface
{
ALIGNED_CLASS_(16);
public:
/*! allocator that performs unified shared memory allocations */
template<typename T, size_t alignment>
struct allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
allocator() {}
allocator(Device* device)
: device(device) {}
__forceinline pointer allocate( size_type n ) {
assert(device);
return (pointer) device->malloc(n*sizeof(T),alignment);
}
__forceinline void deallocate( pointer p, size_type n ) {
if (device) device->free(p);
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
Device* device = nullptr;
};
/*! vector class that performs aligned allocations from Device object */
template<typename T>
using vector = vector_t<T,allocator<T,std::alignment_of<T>::value>>;
template<typename T, size_t alignment>
using avector = vector_t<T,allocator<T,alignment>>;
public:
/*! Device construction */
Device (const char* cfg);
/*! Device destruction */
virtual ~Device ();
/*! prints info about the device */
void print();
/*! sets the error code */
void setDeviceErrorCode(RTCError error);
/*! returns and clears the error code */
RTCError getDeviceErrorCode();
/*! sets the error code */
static void setThreadErrorCode(RTCError error);
/*! returns and clears the error code */
static RTCError getThreadErrorCode();
/*! processes error codes, do not call directly */
static void process_error(Device* device, RTCError error, const char* str);
/*! invokes the memory monitor callback */
void memoryMonitor(ssize_t bytes, bool post);
/*! sets the size of the software cache. */
void setCacheSize(size_t bytes);
/*! sets a property */
void setProperty(const RTCDeviceProperty prop, ssize_t val);
/*! gets a property */
ssize_t getProperty(const RTCDeviceProperty prop);
/*! enter device by setting up some global state */
virtual void enter() {}
/*! leave device by setting up some global state */
virtual void leave() {}
/*! buffer allocation */
virtual void* malloc(size_t size, size_t align);
/*! buffer deallocation */
virtual void free(void* ptr);
private:
/*! initializes the tasking system */
void initTaskingSystem(size_t numThreads);
/*! shuts down the tasking system */
void exitTaskingSystem();
std::unique_ptr<TaskArena> arena;
public:
// use tasking system arena to execute func
void execute(bool join, const std::function<void()>& func);
/*! some variables that can be set via rtcSetParameter1i for debugging purposes */
public:
static ssize_t debug_int0;
static ssize_t debug_int1;
static ssize_t debug_int2;
static ssize_t debug_int3;
public:
std::unique_ptr<BVH4Factory> bvh4_factory;
#if defined(EMBREE_TARGET_SIMD8)
std::unique_ptr<BVH8Factory> bvh8_factory;
#endif
};
#if defined(EMBREE_SYCL_SUPPORT)
class DeviceGPU : public Device
{
public:
DeviceGPU(sycl::context sycl_context, const char* cfg);
~DeviceGPU();
virtual void enter() override;
virtual void leave() override;
virtual void* malloc(size_t size, size_t align) override;
virtual void free(void* ptr) override;
/* set SYCL device */
void setSYCLDevice(const sycl::device sycl_device);
private:
sycl::context gpu_context;
sycl::device gpu_device;
unsigned int gpu_maxWorkGroupSize;
unsigned int gpu_maxComputeUnits;
public:
void* dispatchGlobalsPtr = nullptr;
public:
inline sycl::device &getGPUDevice() { return gpu_device; }
inline sycl::context &getGPUContext() { return gpu_context; }
inline unsigned int getGPUMaxWorkGroupSize() { return gpu_maxWorkGroupSize; }
void init_rthw_level_zero();
void init_rthw_opencl();
};
#endif
struct DeviceEnterLeave
{
DeviceEnterLeave (RTCDevice hdevice);
DeviceEnterLeave (RTCScene hscene);
DeviceEnterLeave (RTCGeometry hgeometry);
DeviceEnterLeave (RTCBuffer hbuffer);
~DeviceEnterLeave();
private:
Device* device;
};
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "geometry.h"
#include "scene.h"
namespace embree
{
const char* Geometry::gtype_names[Geometry::GTY_END] =
{
"flat_linear_curve",
"round_linear_curve",
"oriented_linear_curve",
"",
"flat_bezier_curve",
"round_bezier_curve",
"oriented_bezier_curve",
"",
"flat_bspline_curve",
"round_bspline_curve",
"oriented_bspline_curve",
"",
"flat_hermite_curve",
"round_hermite_curve",
"oriented_hermite_curve",
"",
"flat_catmull_rom_curve",
"round_catmull_rom_curve",
"oriented_catmull_rom_curve",
"",
"triangles",
"quads",
"grid",
"subdivs",
"",
"sphere",
"disc",
"oriented_disc",
"",
"usergeom",
"instance_cheap",
"instance_expensive",
};
Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps)
: device(device), userPtr(nullptr),
numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f),
mask(1),
gtype(gtype),
gsubtype(GTY_SUBTYPE_DEFAULT),
quality(RTC_BUILD_QUALITY_MEDIUM),
state((unsigned)State::MODIFIED),
enabled(true),
argumentFilterEnabled(false),
intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr)
{
device->refInc();
}
Geometry::~Geometry()
{
device->refDec();
}
void Geometry::setNumPrimitives(unsigned int numPrimitives_in)
{
if (numPrimitives_in == numPrimitives) return;
numPrimitives = numPrimitives_in;
Geometry::update();
}
void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in)
{
if (numTimeSteps_in == numTimeSteps) {
return;
}
numTimeSteps = numTimeSteps_in;
fnumTimeSegments = float(numTimeSteps_in-1);
Geometry::update();
}
void Geometry::setTimeRange (const BBox1f range)
{
time_range = range;
Geometry::update();
}
BBox1f Geometry::getTimeRange () const
{
return time_range;
}
void Geometry::update()
{
++modCounter_; // FIXME: required?
state = (unsigned)State::MODIFIED;
}
void Geometry::commit()
{
++modCounter_;
state = (unsigned)State::COMMITTED;
}
void Geometry::preCommit()
{
if (State::MODIFIED == (State)state)
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed");
}
void Geometry::postCommit()
{
}
void Geometry::enable ()
{
if (isEnabled())
return;
enabled = true;
++modCounter_;
}
void Geometry::disable ()
{
if (isDisabled())
return;
enabled = false;
++modCounter_;
}
void Geometry::setUserData (void* ptr)
{
userPtr = ptr;
}
void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter)
{
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
intersectionFilterN = filter;
}
void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter)
{
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
occlusionFilterN = filter;
}
void Geometry::setPointQueryFunction (RTCPointQueryFunction func)
{
pointQueryFunc = func;
}
void Geometry::interpolateN(const RTCInterpolateNArguments* const args)
{
const void* valid_i = args->valid;
const unsigned* primIDs = args->primIDs;
const float* u = args->u;
const float* v = args->v;
unsigned int N = args->N;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex");
const int* valid = (const int*) valid_i;
__aligned(64) float P_tmp[256];
__aligned(64) float dPdu_tmp[256];
__aligned(64) float dPdv_tmp[256];
__aligned(64) float ddPdudu_tmp[256];
__aligned(64) float ddPdvdv_tmp[256];
__aligned(64) float ddPdudv_tmp[256];
float* Pt = P ? P_tmp : nullptr;
float* dPdut = nullptr, *dPdvt = nullptr;
if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; }
float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr;
if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; }
for (unsigned int i=0; i<N; i++)
{
if (valid && !valid[i]) continue;
RTCInterpolateArguments iargs;
iargs.primID = primIDs[i];
iargs.u = u[i];
iargs.v = v[i];
iargs.bufferType = bufferType;
iargs.bufferSlot = bufferSlot;
iargs.P = Pt;
iargs.dPdu = dPdut;
iargs.dPdv = dPdvt;
iargs.ddPdudu = ddPdudut;
iargs.ddPdvdv = ddPdvdvt;
iargs.ddPdudv = ddPdudvt;
iargs.valueCount = valueCount;
interpolate(&iargs);
if (likely(P)) {
for (unsigned int j=0; j<valueCount; j++)
P[j*N+i] = Pt[j];
}
if (likely(dPdu))
{
for (unsigned int j=0; j<valueCount; j++) {
dPdu[j*N+i] = dPdut[j];
dPdv[j*N+i] = dPdvt[j];
}
}
if (likely(ddPdudu))
{
for (unsigned int j=0; j<valueCount; j++) {
ddPdudu[j*N+i] = ddPdudut[j];
ddPdvdv[j*N+i] = ddPdvdvt[j];
ddPdudv[j*N+i] = ddPdudvt[j];
}
}
}
}
bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context)
{
assert(context->primID < size());
RTCPointQueryFunctionArguments args;
args.query = (RTCPointQuery*)context->query_ws;
args.userPtr = context->userPtr;
args.primID = context->primID;
args.geomID = context->geomID;
args.context = context->userContext;
args.similarityScale = context->similarityScale;
bool update = false;
if(context->func) update |= context->func(&args);
if(pointQueryFunc) update |= pointQueryFunc(&args);
if (update && context->userContext->instStackSize > 0)
{
// update point query
if (context->query_type == POINT_QUERY_TYPE_AABB) {
context->updateAABB();
} else {
assert(context->similarityScale > 0.f);
query->radius = context->query_ws->radius * context->similarityScale;
}
}
return update;
}
}

View file

@ -0,0 +1,663 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
#include "buffer.h"
#include "../common/point_query.h"
#include "../builders/priminfo.h"
#include "../builders/priminfo_mb.h"
namespace embree
{
class Scene;
class Geometry;
struct GeometryCounts
{
__forceinline GeometryCounts()
: numFilterFunctions(0),
numTriangles(0), numMBTriangles(0),
numQuads(0), numMBQuads(0),
numBezierCurves(0), numMBBezierCurves(0),
numLineSegments(0), numMBLineSegments(0),
numSubdivPatches(0), numMBSubdivPatches(0),
numUserGeometries(0), numMBUserGeometries(0),
numInstancesCheap(0), numMBInstancesCheap(0),
numInstancesExpensive(0), numMBInstancesExpensive(0),
numInstanceArrays(0), numMBInstanceArrays(0),
numGrids(0), numMBGrids(0),
numSubGrids(0), numMBSubGrids(0),
numPoints(0), numMBPoints(0) {}
__forceinline size_t size() const {
return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numInstanceArrays + numGrids + numPoints
+ numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBInstanceArrays + numMBGrids + numMBPoints;
}
__forceinline unsigned int enabledGeometryTypesMask() const
{
unsigned int mask = 0;
if (numTriangles) mask |= 1 << 0;
if (numQuads) mask |= 1 << 1;
if (numBezierCurves+numLineSegments) mask |= 1 << 2;
if (numSubdivPatches) mask |= 1 << 3;
if (numUserGeometries) mask |= 1 << 4;
if (numInstancesCheap) mask |= 1 << 5;
if (numInstancesExpensive) mask |= 1 << 6;
if (numInstanceArrays) mask |= 1 << 7;
if (numGrids) mask |= 1 << 8;
if (numPoints) mask |= 1 << 9;
unsigned int maskMB = 0;
if (numMBTriangles) maskMB |= 1 << 0;
if (numMBQuads) maskMB |= 1 << 1;
if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2;
if (numMBSubdivPatches) maskMB |= 1 << 3;
if (numMBUserGeometries) maskMB |= 1 << 4;
if (numMBInstancesCheap) maskMB |= 1 << 5;
if (numMBInstancesExpensive) maskMB |= 1 << 6;
if (numMBInstanceArrays) maskMB |= 1 << 7;
if (numMBGrids) maskMB |= 1 << 8;
if (numMBPoints) maskMB |= 1 << 9;
return (mask<<8) + maskMB;
}
__forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const
{
GeometryCounts ret;
ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions;
ret.numTriangles = numTriangles + rhs.numTriangles;
ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles;
ret.numQuads = numQuads + rhs.numQuads;
ret.numMBQuads = numMBQuads + rhs.numMBQuads;
ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves;
ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves;
ret.numLineSegments = numLineSegments + rhs.numLineSegments;
ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments;
ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches;
ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches;
ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries;
ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries;
ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap;
ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap;
ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive;
ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive;
ret.numInstanceArrays = numInstanceArrays + rhs.numInstanceArrays;
ret.numMBInstanceArrays = numMBInstanceArrays + rhs.numMBInstanceArrays;
ret.numGrids = numGrids + rhs.numGrids;
ret.numMBGrids = numMBGrids + rhs.numMBGrids;
ret.numSubGrids = numSubGrids + rhs.numSubGrids;
ret.numMBSubGrids = numMBSubGrids + rhs.numMBSubGrids;
ret.numPoints = numPoints + rhs.numPoints;
ret.numMBPoints = numMBPoints + rhs.numMBPoints;
return ret;
}
size_t numFilterFunctions; //!< number of geometries with filter functions enabled
size_t numTriangles; //!< number of enabled triangles
size_t numMBTriangles; //!< number of enabled motion blurred triangles
size_t numQuads; //!< number of enabled quads
size_t numMBQuads; //!< number of enabled motion blurred quads
size_t numBezierCurves; //!< number of enabled curves
size_t numMBBezierCurves; //!< number of enabled motion blurred curves
size_t numLineSegments; //!< number of enabled line segments
size_t numMBLineSegments; //!< number of enabled line motion blurred segments
size_t numSubdivPatches; //!< number of enabled subdivision patches
size_t numMBSubdivPatches; //!< number of enabled motion blurred subdivision patches
size_t numUserGeometries; //!< number of enabled user geometries
size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
size_t numInstancesCheap; //!< number of enabled cheap instances
size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances
size_t numInstancesExpensive; //!< number of enabled expensive instances
size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances
size_t numInstanceArrays; //!< number of enabled instance arrays
size_t numMBInstanceArrays; //!< number of enabled motion blurred instance arrays
size_t numGrids; //!< number of enabled grid geometries
size_t numMBGrids; //!< number of enabled motion blurred grid geometries
size_t numSubGrids; //!< number of enabled grid geometries
size_t numMBSubGrids; //!< number of enabled motion blurred grid geometries
size_t numPoints; //!< number of enabled points
size_t numMBPoints; //!< number of enabled motion blurred points
};
/*! Base class all geometries are derived from */
class Geometry : public RefCount
{
ALIGNED_CLASS_USM_(16);
friend class Scene;
public:
/*! type of geometry */
enum GType
{
GTY_FLAT_LINEAR_CURVE = 0,
GTY_ROUND_LINEAR_CURVE = 1,
GTY_ORIENTED_LINEAR_CURVE = 2,
GTY_CONE_LINEAR_CURVE = 3,
GTY_FLAT_BEZIER_CURVE = 4,
GTY_ROUND_BEZIER_CURVE = 5,
GTY_ORIENTED_BEZIER_CURVE = 6,
GTY_FLAT_BSPLINE_CURVE = 8,
GTY_ROUND_BSPLINE_CURVE = 9,
GTY_ORIENTED_BSPLINE_CURVE = 10,
GTY_FLAT_HERMITE_CURVE = 12,
GTY_ROUND_HERMITE_CURVE = 13,
GTY_ORIENTED_HERMITE_CURVE = 14,
GTY_FLAT_CATMULL_ROM_CURVE = 16,
GTY_ROUND_CATMULL_ROM_CURVE = 17,
GTY_ORIENTED_CATMULL_ROM_CURVE = 18,
GTY_TRIANGLE_MESH = 20,
GTY_QUAD_MESH = 21,
GTY_GRID_MESH = 22,
GTY_SUBDIV_MESH = 23,
GTY_SPHERE_POINT = 25,
GTY_DISC_POINT = 26,
GTY_ORIENTED_DISC_POINT = 27,
GTY_USER_GEOMETRY = 29,
GTY_INSTANCE_CHEAP = 30,
GTY_INSTANCE_EXPENSIVE = 31,
GTY_INSTANCE_ARRAY = 24,
GTY_END = 32,
GTY_BASIS_LINEAR = 0,
GTY_BASIS_BEZIER = 4,
GTY_BASIS_BSPLINE = 8,
GTY_BASIS_HERMITE = 12,
GTY_BASIS_CATMULL_ROM = 16,
GTY_BASIS_MASK = 28,
GTY_SUBTYPE_FLAT_CURVE = 0,
GTY_SUBTYPE_ROUND_CURVE = 1,
GTY_SUBTYPE_ORIENTED_CURVE = 2,
GTY_SUBTYPE_MASK = 3,
};
enum GSubType
{
GTY_SUBTYPE_DEFAULT= 0,
GTY_SUBTYPE_INSTANCE_LINEAR = 0,
GTY_SUBTYPE_INSTANCE_QUATERNION = 1
};
enum GTypeMask
{
MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE,
MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE,
MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE,
MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE,
MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE,
MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE,
MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE,
MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE,
MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE,
MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE,
MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE,
MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE,
MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE,
MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE,
MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE,
MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE,
MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE,
MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE |
MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE |
MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE |
MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE,
MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT,
MTY_DISC_POINT = 1ul << GTY_DISC_POINT,
MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT,
MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT,
MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS,
MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH,
MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH,
MTY_GRID_MESH = 1ul << GTY_GRID_MESH,
MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH,
MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY,
MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP,
MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE,
MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE,
MTY_INSTANCE_ARRAY = 1ul << GTY_INSTANCE_ARRAY,
MTY_ALL = -1
};
static const char* gtype_names[GTY_END];
enum class State : unsigned {
MODIFIED = 0,
COMMITTED = 1,
};
public:
/*! Geometry constructor */
Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps);
/*! Geometry destructor */
virtual ~Geometry();
public:
/*! tests if geometry is enabled */
__forceinline bool isEnabled() const { return enabled; }
/*! tests if geometry is disabled */
__forceinline bool isDisabled() const { return !isEnabled(); }
/* checks if argument version of filter functions are enabled */
__forceinline bool hasArgumentFilterFunctions() const {
return argumentFilterEnabled;
}
/*! tests if that geometry has some filter function set */
__forceinline bool hasGeometryFilterFunctions () const {
return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr);
}
/*! returns geometry type */
__forceinline GType getType() const { return gtype; }
/*! returns curve type */
__forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); }
/*! returns curve basis */
__forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); }
/*! returns geometry type mask */
__forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); }
/*! returns true of geometry contains motion blur */
__forceinline bool hasMotionBlur () const {
return numTimeSteps > 1;
}
/*! returns number of primitives */
__forceinline size_t size() const { return numPrimitives; }
/*! sets the number of primitives */
virtual void setNumPrimitives(unsigned int numPrimitives_in);
/*! sets number of time steps */
virtual void setNumTimeSteps (unsigned int numTimeSteps_in);
/*! sets motion blur time range */
void setTimeRange (const BBox1f range);
/*! gets motion blur time range */
BBox1f getTimeRange () const;
/*! sets number of vertex attributes */
virtual void setVertexAttributeCount (unsigned int N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! sets number of topologies */
virtual void setTopologyCount (unsigned int N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! sets the build quality */
void setBuildQuality(RTCBuildQuality quality_in)
{
this->quality = quality_in;
Geometry::update();
}
/* calculate time segment itime and fractional time ftime */
__forceinline int timeSegment(float time, float& ftime) const {
return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime);
}
template<int N>
__forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const {
return getTimeSegment<N>(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime);
}
/* calculate overlapping time segment range */
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
return getTimeSegmentRange(range,time_range,fnumTimeSegments);
}
/* returns time that corresponds to time step */
__forceinline float timeStep(const int i) const {
assert(i>=0 && i<(int)numTimeSteps);
return time_range.lower + time_range.size()*float(i)/fnumTimeSegments;
}
/*! for all geometries */
public:
/*! Enable geometry. */
virtual void enable();
/*! Update geometry. */
void update();
/*! commit of geometry */
virtual void commit();
/*! Update geometry buffer. */
virtual void updateBuffer(RTCBufferType type, unsigned int slot) {
update(); // update everything for geometries not supporting this call
}
/*! Disable geometry. */
virtual void disable();
/*! Verify the geometry */
virtual bool verify() { return true; }
/*! called before every build */
virtual void preCommit();
/*! called after every build */
virtual void postCommit();
virtual void addElementsToCount (GeometryCounts & counts) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
};
/*! sets constant tessellation rate for the geometry */
virtual void setTessellationRate(float N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets the maximal curve radius scale allowed by min-width feature. */
virtual void setMaxRadiusScale(float s) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set user data pointer. */
virtual void setUserData(void* ptr);
/*! Get user data pointer. */
__forceinline void* getUserData() const {
return userPtr;
}
/*! interpolates user data to the specified u/v location */
virtual void interpolate(const RTCInterpolateArguments* const args) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! interpolates user data to the specified u/v locations */
virtual void interpolateN(const RTCInterpolateNArguments* const args);
/* point query api */
bool pointQuery(PointQuery* query, PointQueryContext* context);
/*! for subdivision surfaces only */
public:
virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set displacement function. */
virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getFirstHalfEdge(unsigned int faceID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getFace(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getNextHalfEdge(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! get fast access to first vertex buffer if applicable */
virtual float * getCompactVertexArray () const {
return nullptr;
}
/*! Returns the modified counter - how many times the geo has been modified */
__forceinline unsigned int getModCounter () const {
return modCounter_;
}
/*! for triangle meshes and bezier curves only */
public:
/*! Sets ray mask. */
virtual void setMask(unsigned mask) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets specified buffer. */
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Gets specified buffer. */
virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set intersection filter function for ray packets of size N. */
virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN);
/*! Set occlusion filter function for ray packets of size N. */
virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN);
/* Enables argument version of intersection or occlusion filter function. */
virtual void enableFilterFunctionFromArguments (bool enable) {
argumentFilterEnabled = enable;
}
/*! for instances only */
public:
/*! Sets the instanced scene */
virtual void setInstancedScene(const Ref<Scene>& scene) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets the instanced scenes */
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets transformation of the instance */
virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets transformation of the instance */
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Returns the transformation of the instance */
virtual AffineSpace3fa getTransform(float time) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Returns the transformation of the instance */
virtual AffineSpace3fa getTransform(size_t instance, float time) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! for user geometries only */
public:
/*! Set bounds function. */
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set intersect function for ray packets of size N. */
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set occlusion function for ray packets of size N. */
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set point query function. */
void setPointQueryFunction(RTCPointQueryFunction func);
/*! returns number of time segments */
__forceinline unsigned numTimeSegments () const {
return numTimeSteps-1;
}
public:
virtual PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry");
}
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims.data(),r,k,geomID);
}
PrimInfo createPrimRefArray(avector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims.data(),r,k,geomID);
}
virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims,r,k,geomID);
}
virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
/*! Calculates the PrimRef over the complete time interval */
virtual PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
}
PrimInfo createPrimRefArrayMB(avector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
}
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefMBArray(prims,t0t1,r,k,geomID);
}
virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
}
virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
}
virtual Vec3fa computeDirection(unsigned int primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
}
virtual Vec3fa computeDirection(unsigned int primID, size_t time) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
}
virtual BBox3fa vbounds(size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const {
return vlinearBounds(primID,time_range);
}
virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
public:
__forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; }
__forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; }
public:
Device* device; //!< device this geometry belongs to
void* userPtr; //!< user pointer
unsigned int numPrimitives; //!< number of primitives of this geometry
unsigned int numTimeSteps; //!< number of time steps
float fnumTimeSegments; //!< number of time segments (precalculation)
BBox1f time_range; //!< motion blur time range
unsigned int mask; //!< for masking out geometry
unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified
struct {
GType gtype : 8; //!< geometry type
GSubType gsubtype : 8; //!< geometry subtype
RTCBuildQuality quality : 3; //!< build quality for geometry
unsigned state : 2;
bool enabled : 1; //!< true if geometry is enabled
bool argumentFilterEnabled : 1; //!< true if argument filter functions are enabled for this geometry
};
RTCFilterFunctionN intersectionFilterN;
RTCFilterFunctionN occlusionFilterN;
RTCPointQueryFunction pointQueryFunc;
};
}

View file

@ -0,0 +1,153 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "ray.h"
#include "instance_stack.h"
namespace embree
{
/* Hit structure for K hits */
template<int K>
struct HitK
{
/* Default construction does nothing */
__forceinline HitK() {}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng)
: Ng(Ng), u(u), v(v), primID(primID), geomID(geomID)
{
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
instID[l] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
instance_id_stack::copy_UV<K>(context->instID, instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UV<K>(context->instPrimID, instPrimID);
#endif
}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const Vec2vf<K>& uv, const Vec3vf<K>& Ng)
: HitK(context,geomID,primID,uv.x,uv.y,Ng) {}
/* Returns the size of the hit */
static __forceinline size_t size() { return K; }
public:
Vec3vf<K> Ng; // geometry normal
vfloat<K> u; // barycentric u coordinate of hit
vfloat<K> v; // barycentric v coordinate of hit
vuint<K> primID; // primitive ID
vuint<K> geomID; // geometry ID
vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
#endif
};
/* Specialization for a single hit */
template<>
struct __aligned(16) HitK<1>
{
/* Default construction does nothing */
__forceinline HitK() {}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng)
: Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID)
{
instance_id_stack::copy_UU(context, context->instID, instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(context, context->instPrimID, instPrimID);
#endif
}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, const Vec2f& uv, const Vec3fa& Ng)
: HitK<1>(context,geomID,primID,uv.x,uv.y,Ng) {}
/* Returns the size of the hit */
static __forceinline size_t size() { return 1; }
public:
Vec3<float> Ng; // geometry normal
float u; // barycentric u coordinate of hit
float v; // barycentric v coordinate of hit
unsigned int primID; // primitive ID
unsigned int geomID; // geometry ID
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
#endif
};
/* Shortcuts */
typedef HitK<1> Hit;
typedef HitK<4> Hit4;
typedef HitK<8> Hit8;
typedef HitK<16> Hit16;
typedef HitK<VSIZEX> Hitx;
/* Outputs hit to stream */
template<int K>
__forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray)
{
cout << "{ " << embree_endl
<< " Ng = " << ray.Ng << embree_endl
<< " u = " << ray.u << embree_endl
<< " v = " << ray.v << embree_endl
<< " primID = " << ray.primID << embree_endl
<< " geomID = " << ray.geomID << embree_endl
<< " instID =";
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
{
cout << " " << ray.instID[l];
}
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
cout << " instPrimID =";
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
{
cout << " " << ray.instPrimID[l];
}
#endif
cout << embree_endl;
return cout << "}";
}
template<typename Hit>
__forceinline void copyHitToRay(RayHit& ray, const Hit& hit)
{
ray.Ng = hit.Ng;
ray.u = hit.u;
ray.v = hit.v;
ray.primID = hit.primID;
ray.geomID = hit.geomID;
instance_id_stack::copy_UU(hit.instID, ray.instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(hit.instPrimID, ray.instPrimID);
#endif
}
template<int K>
__forceinline void copyHitToRay(const vbool<K>& mask, RayHitK<K>& ray, const HitK<K>& hit)
{
vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x);
vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y);
vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z);
vfloat<K>::storeu(mask,&ray.u, hit.u);
vfloat<K>::storeu(mask,&ray.v, hit.v);
vuint<K>::storeu(mask,&ray.primID, hit.primID);
vuint<K>::storeu(mask,&ray.geomID, hit.geomID);
instance_id_stack::copy_VV<K>(hit.instID, ray.instID, mask);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_VV<K>(hit.instPrimID, ray.instPrimID, mask);
#endif
}
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "rtcore.h"
namespace embree {
namespace instance_id_stack {
static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0,
"RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0.");
/*******************************************************************************
* Instance ID stack manipulation.
* This is used from the instance intersector.
******************************************************************************/
/*
* Push an instance to the stack.
*/
template<typename Context>
RTC_FORCEINLINE bool push(Context context,
unsigned instanceId,
unsigned instancePrimId)
{
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT;
/* We assert here because instances are silently dropped when the stack is full.
This might be quite hard to find in production. */
assert(spaceAvailable);
if (likely(spaceAvailable)) {
context->instID[context->instStackSize] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = instancePrimId;
#endif
context->instStackSize++;
}
return spaceAvailable;
#else
const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID);
assert(spaceAvailable);
if (likely(spaceAvailable)) {
context->instID[0] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[0] = instancePrimId;
#endif
}
return spaceAvailable;
#endif
}
/*
* Pop the last instance pushed to the stack.
* Do not call on an empty stack.
*/
template<typename Context>
RTC_FORCEINLINE void pop(Context context)
{
assert(context);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
assert(context->instStackSize > 0);
--context->instStackSize;
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#endif
#else
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
context->instID[0] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[0] = RTC_INVALID_GEOMETRY_ID;
#endif
#endif
}
/* Push an instance to the stack. Used for point queries*/
RTC_FORCEINLINE bool push(RTCPointQueryContext* context,
unsigned int instanceId,
unsigned int instancePrimId,
AffineSpace3fa const& w2i,
AffineSpace3fa const& i2w)
{
assert(context);
const size_t stackSize = context->instStackSize;
assert(stackSize < RTC_MAX_INSTANCE_LEVEL_COUNT);
context->instID[stackSize] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[stackSize] = instancePrimId;
#endif
AffineSpace3fa_store_unaligned(w2i,(AffineSpace3fa*)context->world2inst[stackSize]);
AffineSpace3fa_store_unaligned(i2w,(AffineSpace3fa*)context->inst2world[stackSize]);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
if (unlikely(stackSize > 0))
{
const AffineSpace3fa world2inst = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize ])
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize-1]);
const AffineSpace3fa inst2world = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize-1])
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize ]);
AffineSpace3fa_store_unaligned(world2inst,(AffineSpace3fa*)context->world2inst[stackSize]);
AffineSpace3fa_store_unaligned(inst2world,(AffineSpace3fa*)context->inst2world[stackSize]);
}
#endif
context->instStackSize++;
return true;
}
template<>
RTC_FORCEINLINE void pop(RTCPointQueryContext* context)
{
assert(context);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
assert(context->instStackSize > 0);
#else
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
#endif
--context->instStackSize;
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#endif
}
/*
* Optimized instance id stack copy.
* The copy() functions will either copy full
* stacks or copy only until the last valid element has been copied, depending
* on RTC_MAX_INSTANCE_LEVEL_COUNT.
*/
RTC_FORCEINLINE void copy_UU(const unsigned* src, unsigned* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
RTC_FORCEINLINE void copy_UU(const RTCRayQueryContext* context, const unsigned* src, unsigned* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
unsigned int depth = context->instStackSize;
for (unsigned l = 0; l < depth; ++l)
tgt[l] = src[l];
for (unsigned l = depth; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
tgt[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, size_t j)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0][j] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l][j] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, const vbool<K>& mask)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
vuint<K>::store(mask, tgt, src[0]);
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
vuint<K>::store(mask, tgt + l, src[l]);
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VU(const vuint<K>* src, unsigned* tgt, size_t i)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0][i];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l][i];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, size_t i, size_t j)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0][j] = src[0][i];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l][j] = src[l][i];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
vuint<K>::store(mask, tgt, src[0]);
#else
vbool<K> done = !mask;
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
vuint<K>::store(mask, tgt + l, src[l]);
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) {
done |= src[l] == RTC_INVALID_GEOMETRY_ID;
if (all(done)) break;
}
}
#endif
}
} // namespace instance_id_stack
} // namespace embree

View file

@ -0,0 +1,246 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/sys/platform.h"
#include "../../common/sys/sysinfo.h"
namespace embree
{
#define DEFINE_SYMBOL2(type,name) \
typedef type (*name##Func)(); \
name##Func name;
#define DECLARE_SYMBOL2(type,name) \
namespace sse2 { extern type name(); } \
namespace sse42 { extern type name(); } \
namespace avx { extern type name(); } \
namespace avx2 { extern type name(); } \
namespace avx512 { extern type name(); } \
void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \
type name##_error() { return type(name##_error2); } \
type name##_zero() { return type(nullptr); }
#define DECLARE_ISA_FUNCTION(type,symbol,args) \
namespace sse2 { extern type symbol(args); } \
namespace sse42 { extern type symbol(args); } \
namespace avx { extern type symbol(args); } \
namespace avx2 { extern type symbol(args); } \
namespace avx512 { extern type symbol(args); } \
inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \
typedef type (*symbol##Ty)(args); \
#define DEFINE_ISA_FUNCTION(type,symbol,args) \
typedef type (*symbol##Func)(args); \
symbol##Func symbol;
#define ZERO_SYMBOL(features,intersector) \
intersector = intersector##_zero;
#define INIT_SYMBOL(features,intersector) \
intersector = decltype(intersector)(intersector##_error);
#define SELECT_SYMBOL_DEFAULT(features,intersector) \
intersector = isa::intersector;
#if defined(__SSE__) || defined(__ARM_NEON)
#if !defined(EMBREE_TARGET_SIMD4)
#define EMBREE_TARGET_SIMD4
#endif
#endif
#if defined(EMBREE_TARGET_SSE42)
#define SELECT_SYMBOL_SSE42(features,intersector) \
if ((features & SSE42) == SSE42) intersector = sse42::intersector;
#else
#define SELECT_SYMBOL_SSE42(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX) || defined(__AVX__)
#if !defined(EMBREE_TARGET_SIMD8)
#define EMBREE_TARGET_SIMD8
#endif
#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target
#define SELECT_SYMBOL_AVX(features,intersector) \
if ((features & ISA) == ISA) intersector = isa::intersector;
#else
#define SELECT_SYMBOL_AVX(features,intersector) \
if ((features & AVX) == AVX) intersector = avx::intersector;
#endif
#else
#define SELECT_SYMBOL_AVX(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX2)
#if !defined(EMBREE_TARGET_SIMD8)
#define EMBREE_TARGET_SIMD8
#endif
#define SELECT_SYMBOL_AVX2(features,intersector) \
if ((features & AVX2) == AVX2) intersector = avx2::intersector;
#else
#define SELECT_SYMBOL_AVX2(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX512)
#if !defined(EMBREE_TARGET_SIMD16)
#define EMBREE_TARGET_SIMD16
#endif
#define SELECT_SYMBOL_AVX512(features,intersector) \
if ((features & AVX512) == AVX512) intersector = avx512::intersector;
#else
#define SELECT_SYMBOL_AVX512(features,intersector)
#endif
#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512(features,intersector) \
ZERO_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
struct VerifyMultiTargetLinking {
static __noinline int getISA(int depth = 5) {
if (depth == 0) return ISA;
else return getISA(depth-1);
}
};
namespace sse2 { int getISA(); };
namespace sse42 { int getISA(); };
namespace avx { int getISA(); };
namespace avx2 { int getISA(); };
namespace avx512 { int getISA(); };
}

View file

@ -0,0 +1,325 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/math/affinespace.h"
#include "../../common/math/interval.h"
#include <functional>
namespace embree {
#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f
static void motion_derivative_coefficients(const float *p, float *coeff);
struct MotionDerivativeCoefficients
{
float theta;
float coeffs[3*8*7];
MotionDerivativeCoefficients() {}
// xfm0 and xfm1 are interpret as quaternion decomposition
MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1)
{
// cosTheta of the two quaternions
const float cosTheta = min(1.f, max(-1.f,
xfm0.l.vx.w * xfm1.l.vx.w
+ xfm0.l.vy.w * xfm1.l.vy.w
+ xfm0.l.vz.w * xfm1.l.vz.w
+ xfm0.p.w * xfm1.p.w));
theta = std::acos(cosTheta);
Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w);
if (cosTheta < 0.995f) {
// compute perpendicular quaternion
qperp.x = xfm1.p.w - cosTheta * xfm0.p.w;
qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w;
qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w;
qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w;
qperp = normalize(qperp);
}
const float p[33] = {
theta,
xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0
xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1
xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0
qperp.x, qperp.y, qperp.z, qperp.w,
xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0
xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y,
xfm0.l.vz.z, xfm0.p.z,
xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1
xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y,
xfm1.l.vz.z, xfm1.p.z
};
motion_derivative_coefficients(p, coeffs);
}
};
struct MotionDerivative
{
float twoTheta;
float c[8];
MotionDerivative(MotionDerivativeCoefficients const& mdc,
int dim, Vec3fa const& p0, Vec3fa const& p1)
: twoTheta(2.f*mdc.theta)
{
const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z };
for (int i = 0; i < 8; ++i) {
c[i] = 0;
for (int j = 0; j < 7; ++j) {
c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j];
}
}
}
template<typename T>
struct EvalMotionDerivative
{
MotionDerivative const& md;
float offset;
EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {}
T operator()(T const& time) const {
return md.c[0] + md.c[1] * time
+ (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time)
+ (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time)
+ offset;
}
};
unsigned int findRoots(
Interval1f const& interval,
float offset,
float* roots,
unsigned int maxNumRoots)
{
unsigned int numRoots = 0;
EvalMotionDerivative<Interval1f> eval(*this, offset);
findRoots(eval, interval, numRoots, roots, maxNumRoots);
return numRoots;
}
template<typename Eval>
static void findRoots(
Eval const& eval,
Interval1f const& interval,
unsigned int& numRoots,
float* roots,
unsigned int maxNumRoots)
{
Interval1f range = eval(interval);
if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return;
const float split = 0.5f * (interval.upper + interval.lower);
if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f)
{
// check if the root already exists
for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) {
if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON)
return;
}
if (numRoots < maxNumRoots) {
roots[numRoots++] = split;
}
if (numRoots > maxNumRoots) {
printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS
return;
}
return;
}
findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots);
findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots);
}
};
/******************************************************************************
* Code generated with sympy 1.4 *
* See http://www.sympy.org/ for more information. *
* *
* see *
* *
* scripts/generate_motion_derivative_coefficients.py *
* *
* for how this code is generated *
* *
******************************************************************************/
static void motion_derivative_coefficients(const float *p, float *coeff)
{
coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27];
coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24];
coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25];
coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26];
coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15];
coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16];
coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17];
coeff[7] = 0;
coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24];
coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25];
coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26];
coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24];
coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25];
coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26];
coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27];
coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24];
coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25];
coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26];
coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15];
coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16];
coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17];
coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0];
coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24];
coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25];
coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26];
coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24];
coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25];
coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26];
coeff[28] = 0;
coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0];
coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0];
coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0];
coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0];
coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0];
coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0];
coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27];
coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24];
coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25];
coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26];
coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15];
coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16];
coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17];
coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0];
coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24];
coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25];
coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26];
coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24];
coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25];
coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26];
coeff[49] = 0;
coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0];
coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0];
coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0];
coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0];
coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0];
coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0];
coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30];
coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24];
coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28];
coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29];
coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15];
coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19];
coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20];
coeff[63] = 0;
coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28];
coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29];
coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28];
coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29];
coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30];
coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24];
coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28];
coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29];
coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15];
coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19];
coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20];
coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0];
coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28];
coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29];
coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28];
coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29];
coeff[84] = 0;
coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0];
coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0];
coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0];
coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0];
coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0];
coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0];
coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30];
coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24];
coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28];
coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29];
coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15];
coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19];
coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20];
coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0];
coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24];
coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28];
coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29];
coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24];
coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28];
coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29];
coeff[105] = 0;
coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0];
coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0];
coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0];
coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0];
coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0];
coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0];
coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32];
coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24];
coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28];
coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31];
coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15];
coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19];
coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22];
coeff[119] = 0;
coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31];
coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31];
coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30];
coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24];
coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28];
coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29];
coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15];
coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19];
coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20];
coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0];
coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29];
coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29];
coeff[140] = 0;
coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0];
coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0];
coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0];
coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0];
coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0];
coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0];
coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32];
coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24];
coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28];
coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31];
coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15];
coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19];
coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22];
coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0];
coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24];
coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28];
coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31];
coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24];
coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28];
coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31];
coeff[161] = 0;
coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0];
coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0];
coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0];
coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0];
coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0];
coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0];
}
} // namespace embree

View file

@ -0,0 +1,137 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/* Point query structure for closest point query */
template<int K>
struct RTC_ALIGN(16) PointQueryK
{
/* Default construction does nothing */
__forceinline PointQueryK() {}
/* Constructs a ray from origin, direction, and ray segment. Near
* has to be smaller than far */
__forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero)
: p(p), time(time), radius(radius) {}
/* Returns the size of the ray */
static __forceinline size_t size() { return K; }
/* Calculates if this is a valid ray that does not cause issues during traversal */
__forceinline vbool<K> valid() const
{
const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE));
const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE));
const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE));
const vbool<K> vn = radius >= vfloat<K>(0);
const vbool<K> vf = abs(time) < vfloat<K>(inf);
return vx & vy & vz & vn & vf;
}
__forceinline void get(PointQueryK<1>* ray) const;
__forceinline void get(size_t i, PointQueryK<1>& ray) const;
__forceinline void set(const PointQueryK<1>* ray);
__forceinline void set(size_t i, const PointQueryK<1>& ray);
Vec3vf<K> p; // location of the query point
vfloat<K> time; // time for motion blur
vfloat<K> radius; // radius for the point query
};
/* Specialization for a single point query */
template<>
struct RTC_ALIGN(16) PointQueryK<1>
{
/* Default construction does nothing */
__forceinline PointQueryK() {}
/* Constructs a ray from origin, direction, and ray segment. Near
* has to be smaller than far */
__forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero)
: p(p), time(time), radius(radius) {}
/* Calculates if this is a valid ray that does not cause issues during traversal */
__forceinline bool valid() const {
return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf);
}
Vec3f p;
float time;
float radius;
};
/* Converts point query packet to single point query */
template<int K>
__forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const
{
for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
{
query[i].p.x = p.x[i];
query[i].p.y = p.y[i];
query[i].p.z = p.z[i];
query[i].time = time[i];
query[i].radius = radius[i];
}
}
/* Extracts a single point query out of a point query packet*/
template<int K>
__forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const
{
query.p.x = p.x[i];
query.p.y = p.y[i];
query.p.z = p.z[i];
query.radius = radius[i];
query.time = time[i];
}
/* Converts single point query to point query packet */
template<int K>
__forceinline void PointQueryK<K>::set(const PointQueryK<1>* query)
{
for (size_t i = 0; i < K; i++)
{
p.x[i] = query[i].p.x;
p.y[i] = query[i].p.y;
p.z[i] = query[i].p.z;
radius[i] = query[i].radius;
time[i] = query[i].time;
}
}
/* inserts a single point query into a point query packet element */
template<int K>
__forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query)
{
p.x[i] = query.p.x;
p.y[i] = query.p.y;
p.z[i] = query.p.z;
radius[i] = query.radius;
time[i] = query.time;
}
/* Shortcuts */
typedef PointQueryK<1> PointQuery;
typedef PointQueryK<4> PointQuery4;
typedef PointQueryK<8> PointQuery8;
typedef PointQueryK<16> PointQuery16;
typedef PointQueryK<VSIZEX> PointQueryx;
struct PointQueryN;
/* Outputs point query to stream */
template<int K>
__forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query)
{
cout << "{ " << embree_endl
<< " p = " << query.p << embree_endl
<< " r = " << query.radius << embree_endl
<< " time = " << query.time << embree_endl
<< "}";
return cout;
}
}

View file

@ -0,0 +1,159 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/*! helper structure for the implementation of the profile functions below */
struct ProfileTimer
{
static const size_t N = 20;
ProfileTimer () {}
ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
{
for (size_t i=0; i<N; i++) names[i] = nullptr;
for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
}
__forceinline void begin()
{
j=0;
t0 = tj = getSeconds();
}
__forceinline void end() {
absolute("total");
i++;
}
__forceinline void operator() (const char* name) {
relative(name);
}
__forceinline void absolute (const char* name)
{
const double t1 = getSeconds();
const double dt = t1-t0;
assert(names[j] == nullptr || names[j] == name);
names[j] = name;
if (i == 0) dt_fst[j] = dt;
if (i>=numSkip) {
dt_min[j] = min(dt_min[j],dt);
dt_avg[j] = dt_avg[j] + dt;
dt_max[j] = max(dt_max[j],dt);
}
j++;
maxJ = max(maxJ,j);
}
__forceinline void relative (const char* name)
{
const double t1 = getSeconds();
const double dt = t1-tj;
tj = t1;
assert(names[j] == nullptr || names[j] == name);
names[j] = name;
if (i == 0) dt_fst[j] = dt;
if (i>=numSkip) {
dt_min[j] = min(dt_min[j],dt);
dt_avg[j] = dt_avg[j] + dt;
dt_max[j] = max(dt_max[j],dt);
}
j++;
maxJ = max(maxJ,j);
}
void print(size_t numElements)
{
for (size_t k=0; k<N; k++)
dt_avg[k] /= double(i-numSkip);
printf(" profile [M/s]:\n");
for (size_t j=0; j<maxJ; j++)
printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
printf(" profile [ms]:\n");
for (size_t j=0; j<maxJ; j++)
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
}
void print()
{
printf(" profile:\n");
for (size_t k=0; k<N; k++)
dt_avg[k] /= double(i-numSkip);
for (size_t j=0; j<maxJ; j++) {
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
}
}
double avg() {
return dt_avg[maxJ-1]/double(i-numSkip);
}
private:
size_t i;
size_t j;
size_t maxJ;
size_t numSkip;
double t0;
double tj;
const char* names[N];
double dt_fst[N];
double dt_min[N];
double dt_avg[N];
double dt_max[N];
};
/*! This function executes some code block multiple times and measured sections of it.
Use the following way:
profile(1,10,1000,[&](ProfileTimer& timer) {
// code
timer("A");
// code
timer("B");
});
*/
template<typename Closure>
void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
{
ProfileTimer timer(numSkip);
for (size_t i=0; i<numSkip+numIter; i++)
{
timer.begin();
closure(timer);
timer.end();
}
timer.print(numElements);
}
/*! similar as the function above, but the timer object comes externally */
template<typename Closure>
void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
{
timer = ProfileTimer(numSkip);
for (size_t i=0; i<numSkip+numIter; i++)
{
timer.begin();
closure(timer);
timer.end();
}
timer.print(numElements);
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,162 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../include/embree4/rtcore.h"
RTC_NAMESPACE_USE
namespace embree
{
/*! decoding of intersection flags */
__forceinline bool isCoherent (RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_COHERENT; }
__forceinline bool isIncoherent(RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_INCOHERENT; }
/*! Macros used in the rtcore API implementation */
// -- GODOT start --
#define RTC_CATCH_BEGIN
#define RTC_CATCH_END(device)
#define RTC_CATCH_END2(scene)
#define RTC_CATCH_END2_FALSE(scene) return false;
#if 0
// -- GODOT end --
#define RTC_CATCH_BEGIN try {
#define RTC_CATCH_END(device) \
} catch (std::bad_alloc&) { \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
} catch (rtcore_error& e) { \
Device::process_error(device,e.error,e.what()); \
} catch (std::exception& e) { \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
} catch (...) { \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
}
#define RTC_CATCH_END2(scene) \
} catch (std::bad_alloc&) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
} catch (rtcore_error& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,e.error,e.what()); \
} catch (std::exception& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
} catch (...) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
}
#define RTC_CATCH_END2_FALSE(scene) \
} catch (std::bad_alloc&) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
return false; \
} catch (rtcore_error& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,e.error,e.what()); \
return false; \
} catch (std::exception& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
return false; \
} catch (...) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
return false; \
}
#endif
#define RTC_VERIFY_HANDLE(handle) \
if (handle == nullptr) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_GEOMID(id) \
if (id == RTC_INVALID_GEOMETRY_ID) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_UPPER(id,upper) \
if (id > upper) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_RANGE(id,lower,upper) \
if (id < lower || id > upper) \
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds");
#if 0 // enable to debug print all API calls
#define RTC_TRACE(x) std::cout << #x << std::endl;
#else
#define RTC_TRACE(x)
#endif
// -- GODOT start --
#if 0
/*! used to throw embree API errors */
struct rtcore_error : public std::exception
{
__forceinline rtcore_error(RTCError error, const std::string& str)
: error(error), str(str) {}
~rtcore_error() throw() {}
const char* what () const throw () {
return str.c_str();
}
RTCError error;
std::string str;
};
#endif
#if defined(DEBUG) // only report file and line in debug mode
#define throw_RTCError(error,str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
// throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
#define throw_RTCError(error,str) \
abort();
// throw rtcore_error(error,str);
#endif
// -- GODOT end --
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
(settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
inline void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm)
{
switch (format)
{
case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x;
xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y;
xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z;
break;
case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z;
xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z;
xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z;
xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z;
break;
case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f;
xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f;
xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f;
xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f;
break;
default:
#if !defined(__SYCL_DEVICE_ONLY__)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
#endif
break;
}
}
}

View file

@ -0,0 +1,442 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_EXPORT_API
#include "default.h"
#include "device.h"
#include "scene.h"
#include "context.h"
#include "alloc.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/bvh_builder_morton.h"
namespace embree
{
namespace isa // FIXME: support more ISAs for builders
{
struct BVH : public RefCount
{
BVH (Device* device)
: device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0)
{
device->refInc();
}
~BVH() {
device->refDec();
}
public:
Device* device;
FastAllocator allocator;
mvector<BVHBuilderMorton::BuildPrim> morton_src;
mvector<BVHBuilderMorton::BuildPrim> morton_tmp;
};
void* rtcBuildBVHMorton(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims_i = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* initialize temporary arrays for morton builder */
PrimRef* prims = (PrimRef*) prims_i;
mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src;
mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp;
morton_src.resize(primitiveCount);
morton_tmp.resize(primitiveCount);
/* compute centroid bounds */
const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa {
BBox3fa bounds(empty);
for (size_t i=r.begin(); i<r.end(); i++)
bounds.extend(prims[i].bounds().center2());
return bounds;
}, BBox3fa::merge);
/* compute morton codes */
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) {
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]);
for (size_t i=r.begin(); i<r.end(); i++) {
generator(prims[i].bounds(),(unsigned) i);
}
});
/* start morton build */
std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that allocates BVH nodes */
[&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* {
return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
},
/* lambda function that sets bounds */
[&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa>
{
BBox3fa bounds = empty;
void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) {
bounds.extend(children[i].second);
childptrs[i] = children[i].first;
cbounds[i] = (const RTCBounds*)&children[i].second;
}
setNodeBounds(node,cbounds,(unsigned int)N,userPtr);
setNodeChildren(node,childptrs, (unsigned int)N,userPtr);
return std::make_pair(node,bounds);
},
/* lambda function that creates BVH leaves */
[&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa>
{
RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF];
BBox3fa bounds = empty;
for (size_t i=0;i<current.size();i++)
{
const size_t id = morton_src[current.begin()+i].index;
bounds.extend(prims[id].bounds());
localBuildPrims[i] = prims_i[id];
}
void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr);
return std::make_pair(node,bounds);
},
/* lambda that calculates the bounds for some primitive */
[&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa {
return prims[morton.index].bounds();
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
morton_src.data(),morton_tmp.data(),primitiveCount,
*arguments);
bvh->allocator.cleanup();
return root.first;
}
void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* calculate priminfo */
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
{
CentGeomBBox3fa bounds(empty);
for (size_t j=r.begin(); j<r.end(); j++)
bounds.extend((BBox3fa&)prims[j]);
return bounds;
};
const CentGeomBBox3fa bounds =
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
const PrimInfo pinfo(0,primitiveCount,bounds);
/* build BVH */
void* root = BVHBuilderBinnedSAH::build<void*>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that creates BVH nodes */
[&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
{
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
return node;
},
/* lambda function that updates BVH nodes */
[&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
setNodeChildren(node,children, (unsigned int)N,userPtr);
return node;
},
/* lambda function that creates BVH leaves */
[&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
(PrimRef*)prims,pinfo,*arguments);
bvh->allocator.cleanup();
return root;
}
static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) {
CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first);
unsigned int maxGeomID = max(a.second,b.second);
return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID);
}
void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* calculate priminfo */
auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int>
{
CentGeomBBox3fa bounds(empty);
unsigned maxGeomID = 0;
for (size_t j=r.begin(); j<r.end(); j++)
{
bounds.extend((BBox3fa&)prims[j]);
maxGeomID = max(maxGeomID,prims[j].geomID);
}
return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID);
};
const std::pair<CentGeomBBox3fa,unsigned int> pair =
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair);
CentGeomBBox3fa bounds = pair.first;
const unsigned int maxGeomID = pair.second;
if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))))
{
/* fallback code for max geomID larger than threshold */
return rtcBuildBVHBinnedSAH(arguments);
}
const PrimInfo pinfo(0,primitiveCount,bounds);
/* function that splits a build primitive */
struct Splitter
{
Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr)
: splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {}
__forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const
{
prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK;
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
left_o.geomIDref() = geomID; left_o.primIDref() = primID;
right_o.geomIDref() = geomID; right_o.primIDref() = primID;
}
__forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const
{
PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID);
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
}
RTCSplitPrimitiveFunction splitPrimitive;
unsigned geomID;
unsigned primID;
void* userPtr;
};
/* build BVH */
void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that creates BVH nodes */
[&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
{
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
return node;
},
/* lambda function that updates BVH nodes */
[&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
setNodeChildren(node,children, (unsigned int)N,userPtr);
return node;
},
/* lambda function that creates BVH leaves */
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
},
/* returns the splitter */
[&] ( const PrimRef& prim ) -> Splitter {
return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr);
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
(PrimRef*)prims,
arguments->primitiveArrayCapacity,
pinfo,*arguments);
bvh->allocator.cleanup();
return root;
}
}
}
using namespace embree;
using namespace embree::isa;
RTC_NAMESPACE_BEGIN
RTC_API RTCBVH rtcNewBVH(RTCDevice device)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcNewAllocator);
RTC_VERIFY_HANDLE(device);
BVH* bvh = new BVH((Device*)device);
return (RTCBVH) bvh->refInc();
RTC_CATCH_END((Device*)device);
return nullptr;
}
RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcBuildBVH);
RTC_VERIFY_HANDLE(bvh);
RTC_VERIFY_HANDLE(arguments);
RTC_VERIFY_HANDLE(arguments->createNode);
RTC_VERIFY_HANDLE(arguments->setNodeChildren);
RTC_VERIFY_HANDLE(arguments->setNodeBounds);
RTC_VERIFY_HANDLE(arguments->createLeaf);
if (arguments->primitiveArrayCapacity < arguments->primitiveCount)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount")
/* initialize the allocator */
bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
bvh->allocator.reset();
/* switch between different builders based on quality level */
if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
return rtcBuildBVHMorton(arguments);
else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
return rtcBuildBVHBinnedSAH(arguments);
else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) {
if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount)
return rtcBuildBVHBinnedSAH(arguments);
else
return rtcBuildBVHSpatialSAH(arguments);
}
else
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality");
/* if we are in dynamic mode, then do not clear temporary data */
if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC))
{
bvh->morton_src.clear();
bvh->morton_tmp.clear();
}
RTC_CATCH_END(bvh->device);
return nullptr;
}
RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align)
{
FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcThreadLocalAlloc);
return alloc->malloc0(bytes,align);
RTC_CATCH_END(alloc->alloc->getDevice());
return nullptr;
}
RTC_API void rtcMakeStaticBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcStaticBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->morton_src.clear();
bvh->morton_tmp.clear();
RTC_CATCH_END(bvh->device);
}
RTC_API void rtcRetainBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
Device* device = bvh ? bvh->device : nullptr;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcRetainBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->refInc();
RTC_CATCH_END(device);
}
RTC_API void rtcReleaseBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
Device* device = bvh ? bvh->device : nullptr;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcReleaseBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->refDec();
RTC_CATCH_END(device);
}
RTC_NAMESPACE_END

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,400 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
#include "builder.h"
#include "scene_triangle_mesh.h"
#include "scene_quad_mesh.h"
#include "scene_user_geometry.h"
#include "scene_instance.h"
#include "scene_instance_array.h"
#include "scene_curves.h"
#include "scene_line_segments.h"
#include "scene_subdiv_mesh.h"
#include "scene_grid_mesh.h"
#include "scene_points.h"
#include "../subdiv/tessellation_cache.h"
#include "acceln.h"
#include "geometry.h"
#if defined(EMBREE_SYCL_SUPPORT)
#include "../sycl/rthwif_embree_builder.h"
#endif
namespace embree
{
struct TaskGroup;
/*! Base class all scenes are derived from */
class Scene : public AccelN
{
ALIGNED_CLASS_USM_(std::alignment_of<Scene>::value);
public:
template<typename Ty, bool mblur = false>
class Iterator
{
public:
Iterator () {}
Iterator (Scene* scene, bool all = false)
: scene(scene), all(all) {}
__forceinline Ty* at(const size_t i)
{
Geometry* geom = scene->geometries[i].ptr;
if (geom == nullptr) return nullptr;
if (!all && !geom->isEnabled()) return nullptr;
const size_t mask = geom->getTypeMask() & Ty::geom_type;
if (!(mask)) return nullptr;
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
return (Ty*) geom;
}
__forceinline Ty* operator[] (const size_t i) {
return at(i);
}
__forceinline size_t size() const {
return scene->size();
}
__forceinline size_t numPrimitives() const {
return scene->getNumPrimitives(Ty::geom_type,mblur);
}
__forceinline size_t maxPrimitivesPerGeometry()
{
size_t ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,mesh->size());
}
return ret;
}
__forceinline unsigned int maxGeomID()
{
unsigned int ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,(unsigned int)i);
}
return ret;
}
__forceinline unsigned maxTimeStepsPerGeometry()
{
unsigned ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,mesh->numTimeSteps);
}
return ret;
}
private:
Scene* scene;
bool all;
};
class Iterator2
{
public:
Iterator2 () {}
Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur)
: scene(scene), typemask(typemask), mblur(mblur) {}
__forceinline Geometry* at(const size_t i)
{
Geometry* geom = scene->geometries[i].ptr;
if (geom == nullptr) return nullptr;
if (!geom->isEnabled()) return nullptr;
if (!(geom->getTypeMask() & typemask)) return nullptr;
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
return geom;
}
__forceinline Geometry* operator[] (const size_t i) {
return at(i);
}
__forceinline size_t size() const {
return scene->size();
}
private:
Scene* scene;
Geometry::GTypeMask typemask;
bool mblur;
};
public:
/*! Scene construction */
Scene (Device* device);
/*! Scene destruction */
~Scene () noexcept;
private:
/*! class is non-copyable */
Scene (const Scene& other) DELETED; // do not implement
Scene& operator= (const Scene& other) DELETED; // do not implement
public:
void createTriangleAccel();
void createTriangleMBAccel();
void createQuadAccel();
void createQuadMBAccel();
void createHairAccel();
void createHairMBAccel();
void createSubdivAccel();
void createSubdivMBAccel();
void createUserGeometryAccel();
void createUserGeometryMBAccel();
void createInstanceAccel();
void createInstanceMBAccel();
void createInstanceExpensiveAccel();
void createInstanceExpensiveMBAccel();
void createInstanceArrayAccel();
void createInstanceArrayMBAccel();
void createGridAccel();
void createGridMBAccel();
/*! prints statistics about the scene */
void printStatistics();
/*! clears the scene */
void clear();
/*! detaches some geometry */
void detachGeometry(size_t geomID);
void setBuildQuality(RTCBuildQuality quality_flags);
RTCBuildQuality getBuildQuality() const;
void setSceneFlags(RTCSceneFlags scene_flags);
RTCSceneFlags getSceneFlags() const;
void build_cpu_accels();
void build_gpu_accels();
void commit (bool join);
void commit_task ();
void build () {}
/* return number of geometries */
__forceinline size_t size() const { return geometries.size(); }
/* bind geometry to the scene */
unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
/* determines if scene is modified */
__forceinline bool isModified() const { return modified; }
/* sets modified flag */
__forceinline void setModified(bool f = true) {
modified = f;
}
__forceinline bool isGeometryModified(size_t geomID)
{
Ref<Geometry>& g = geometries[geomID];
if (!g) return false;
return g->getModCounter() > geometryModCounters_[geomID];
}
protected:
void checkIfModifiedAndSet ();
public:
/* get mesh by ID */
__forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
__forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
template<typename Mesh>
__forceinline Mesh* get(size_t i) {
assert(i < geometries.size());
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
return (Mesh*)geometries[i].ptr;
}
template<typename Mesh>
__forceinline const Mesh* get(size_t i) const {
assert(i < geometries.size());
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
return (Mesh*)geometries[i].ptr;
}
template<typename Mesh>
__forceinline Mesh* getSafe(size_t i) {
assert(i < geometries.size());
if (geometries[i] == null) return nullptr;
if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
else return (Mesh*) geometries[i].ptr;
}
__forceinline Ref<Geometry> get_locked(size_t i) {
Lock<MutexSys> lock(geometriesMutex);
assert(i < geometries.size());
return geometries[i];
}
/* flag decoding */
__forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
__forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; }
__forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; }
__forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); }
__forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; }
__forceinline bool hasArgumentFilterFunction() const {
return scene_flags & RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS;
}
__forceinline bool hasGeometryFilterFunction() {
return world.numFilterFunctions != 0;
}
__forceinline bool hasFilterFunction() {
return hasArgumentFilterFunction() || hasGeometryFilterFunction();
}
void* createQBVH6Accel();
public:
Device* device;
public:
IDPool<unsigned,0xFFFFFFFE> id_pool;
Device::vector<Ref<Geometry>> geometries = device; //!< list of all user geometries
avector<unsigned int> geometryModCounters_;
Device::vector<float*> vertices = device;
public:
/* these are to detect if we need to recreate the acceleration structures */
bool flags_modified;
unsigned int enabled_geometry_types;
RTCSceneFlags scene_flags;
RTCBuildQuality quality_flags;
MutexSys buildMutex;
MutexSys geometriesMutex;
#if defined(EMBREE_SYCL_SUPPORT)
public:
BBox3f hwaccel_bounds = empty;
AccelBuffer hwaccel;
#endif
private:
bool modified; //!< true if scene got modified
public:
std::unique_ptr<TaskGroup> taskGroup;
public:
struct BuildProgressMonitorInterface : public BuildProgressMonitor {
BuildProgressMonitorInterface(Scene* scene)
: scene(scene) {}
void operator() (size_t dn) const { scene->progressMonitor(double(dn)); }
private:
Scene* scene;
};
BuildProgressMonitorInterface progressInterface;
RTCProgressMonitorFunction progress_monitor_function;
void* progress_monitor_ptr;
std::atomic<size_t> progress_monitor_counter;
void progressMonitor(double nprims);
void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr);
private:
GeometryCounts world; //!< counts for geometry
public:
__forceinline size_t numPrimitives() const {
return world.size();
}
__forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const
{
size_t count = 0;
if (mask & Geometry::MTY_TRIANGLE_MESH)
count += mblur ? world.numMBTriangles : world.numTriangles;
if (mask & Geometry::MTY_QUAD_MESH)
count += mblur ? world.numMBQuads : world.numQuads;
if (mask & Geometry::MTY_CURVE2)
count += mblur ? world.numMBLineSegments : world.numLineSegments;
if (mask & Geometry::MTY_CURVE4)
count += mblur ? world.numMBBezierCurves : world.numBezierCurves;
if (mask & Geometry::MTY_POINTS)
count += mblur ? world.numMBPoints : world.numPoints;
if (mask & Geometry::MTY_SUBDIV_MESH)
count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches;
if (mask & Geometry::MTY_USER_GEOMETRY)
count += mblur ? world.numMBUserGeometries : world.numUserGeometries;
if (mask & Geometry::MTY_INSTANCE_CHEAP)
count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap;
if (mask & Geometry::MTY_INSTANCE_EXPENSIVE)
count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive;
if (mask & Geometry::MTY_INSTANCE_ARRAY)
count += mblur ? world.numMBInstanceArrays : world.numInstanceArrays;
if (mask & Geometry::MTY_GRID_MESH)
count += mblur ? world.numMBGrids : world.numGrids;
return count;
}
__forceinline size_t getNumSubPrimitives(Geometry::GTypeMask mask, bool mblur) const
{
size_t count = 0;
if (mask & Geometry::MTY_GRID_MESH)
count += mblur ? world.numMBSubGrids : world.numSubGrids;
Geometry::GTypeMask new_mask = (Geometry::GTypeMask)(mask & ~Geometry::MTY_GRID_MESH);
count += getNumPrimitives(new_mask, mblur);
return count;
}
template<typename Mesh, bool mblur>
__forceinline unsigned getNumTimeSteps()
{
if (!mblur)
return 1;
Scene::Iterator<Mesh,mblur> iter(this);
return iter.maxTimeStepsPerGeometry();
}
template<typename Mesh, bool mblur>
__forceinline unsigned int getMaxGeomID()
{
Scene::Iterator<Mesh,mblur> iter(this);
return iter.maxGeomID();
}
};
}

View file

@ -0,0 +1,764 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "geometry.h"
#include "buffer.h"
#include "../subdiv/bezier_curve.h"
#include "../subdiv/hermite_curve.h"
#include "../subdiv/bspline_curve.h"
#include "../subdiv/catmullrom_curve.h"
#include "../subdiv/linear_bezier_patch.h"
namespace embree
{
/*! represents an array of bicubic bezier curves */
struct CurveGeometry : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4;
public:
/*! bezier curve construction */
CurveGeometry (Device* device, Geometry::GType gtype);
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns the i'th curve */
__forceinline const unsigned int& curve(size_t i) const {
return curves[i];
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th tangent of the first time step */
__forceinline Vec3ff tangent(size_t i) const {
return tangents0[i];
}
/*! returns i'th normal derivative of the first time step */
__forceinline Vec3fa dnormal(size_t i) const {
return dnormals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th tangent of itime'th timestep */
__forceinline Vec3ff tangent(size_t i, size_t itime) const {
return tangents[itime][i];
}
/*! returns i'th normal derivative of itime'th timestep */
__forceinline Vec3fa dnormal(size_t i, size_t itime) const {
return dnormals[itime][i];
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const
{
p0 = vertex(i+0);
p1 = vertex(i+1);
p2 = vertex(i+2);
p3 = vertex(i+3);
}
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const
{
p0 = vertex(i+0,itime);
p1 = vertex(i+1,itime);
p2 = vertex(i+2,itime);
p3 = vertex(i+3,itime);
}
/*! gathers the curve normals starting with i'th vertex */
__forceinline void gather_normals(Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
{
n0 = normal(i+0);
n1 = normal(i+1);
n2 = normal(i+2);
n3 = normal(i+3);
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
{
p0 = vertex(i+0);
p1 = vertex(i+1);
p2 = vertex(i+2);
p3 = vertex(i+3);
n0 = normal(i+0);
n1 = normal(i+1);
n2 = normal(i+2);
n3 = normal(i+3);
}
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const
{
p0 = vertex(i+0,itime);
p1 = vertex(i+1,itime);
p2 = vertex(i+2,itime);
p3 = vertex(i+3,itime);
n0 = normal(i+0,itime);
n1 = normal(i+1,itime);
n2 = normal(i+2,itime);
n3 = normal(i+3,itime);
}
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
__forceinline void prefetchL1_vertices(size_t i) const
{
prefetchL1(vertices0.getPtr(i)+0);
prefetchL1(vertices0.getPtr(i)+64);
}
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
__forceinline void prefetchL2_vertices(size_t i) const
{
prefetchL2(vertices0.getPtr(i)+0);
prefetchL2(vertices0.getPtr(i)+64);
}
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3;
gather(a0,a1,a2,a3,i,itime);
Vec3ff b0,b1,b2,b3;
gather(b0,b1,b2,b3,i,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
}
/*! loads curve vertices for specified time */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,i,time);
else gather(p0,p1,p2,p3,i);
}
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3;
gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime);
Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3;
gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
n0 = madd(Vec3ff(t0),an0,t1*bn0);
n1 = madd(Vec3ff(t0),an1,t1*bn1);
n2 = madd(Vec3ff(t0),an2,t1*bn2);
n3 = madd(Vec3ff(t0),an3,t1*bn3);
}
/*! loads curve vertices for specified time for mblur and non-mblur case */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,n0,n1,n2,n3,i,time);
else gather(p0,p1,p2,p3,n0,n1,n2,n3,i);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
{
Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3;
unsigned int vertexID = curve(primID);
gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime);
SourceCurve3ff ccurve(v0,v1,v2,v3);
SourceCurve3fa ncurve(n0,n1,n2,n3);
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
return clerp(curve0,curve1,ftime);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime = 0.0f;
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
if (hasMotionBlur()) {
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
return clerp(curve0,curve1,ftime);
}
return curve0;
}
/*! gathers the hermite curve starting with i'th vertex */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const
{
p0 = vertex (i+0);
p1 = vertex (i+1);
t0 = tangent(i+0);
t1 = tangent(i+1);
}
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const
{
p0 = vertex (i+0,itime);
p1 = vertex (i+1,itime);
t0 = tangent(i+0,itime);
t1 = tangent(i+1,itime);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float f0 = 1.0f - ftime, f1 = ftime;
Vec3ff ap0,at0,ap1,at1;
gather_hermite(ap0,at0,ap1,at1,i,itime);
Vec3ff bp0,bt0,bp1,bt1;
gather_hermite(bp0,bt0,bp1,bt1,i,itime+1);
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
t0 = madd(Vec3ff(f0),at0,f1*bt0);
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
t1 = madd(Vec3ff(f0),at1,f1*bt1);
}
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
{
if (hasMotionBlur()) gather_hermite(p0,t0,p1,t1,i,time);
else gather_hermite(p0,t0,p1,t1,i);
}
/*! gathers the hermite curve starting with i'th vertex */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const
{
p0 = vertex (i+0);
p1 = vertex (i+1);
t0 = tangent(i+0);
t1 = tangent(i+1);
n0 = normal(i+0);
n1 = normal(i+1);
dn0 = dnormal(i+0);
dn1 = dnormal(i+1);
}
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const
{
p0 = vertex (i+0,itime);
p1 = vertex (i+1,itime);
t0 = tangent(i+0,itime);
t1 = tangent(i+1,itime);
n0 = normal(i+0,itime);
n1 = normal(i+1,itime);
dn0 = dnormal(i+0,itime);
dn1 = dnormal(i+1,itime);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float f0 = 1.0f - ftime, f1 = ftime;
Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1;
gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime);
Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1;
gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1);
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
t0 = madd(Vec3ff(f0),at0,f1*bt0);
n0 = madd(Vec3ff(f0),an0,f1*bn0);
dn0= madd(Vec3ff(f0),adn0,f1*bdn0);
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
t1 = madd(Vec3ff(f0),at1,f1*bt1);
n1 = madd(Vec3ff(f0),an1,f1*bn1);
dn1= madd(Vec3ff(f0),adn1,f1*bdn1);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
{
if (hasMotionBlur()) gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i,time);
else gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
{
Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1;
unsigned int vertexID = curve(primID);
gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime);
SourceCurve3ff ccurve(v0,t0,v1,t1);
SourceCurve3fa ncurve(n0,dn0,n1,dn1);
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
return clerp(curve0,curve1,ftime);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime = 0.0f;
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
if (hasMotionBlur()) {
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
return clerp(curve0,curve1,ftime);
}
return curve0;
}
/* returns the projected area */
__forceinline float projectedPrimitiveArea(const size_t i) const {
return 1.0f;
}
private:
void resizeBuffers(unsigned int numSteps);
public:
BufferView<unsigned int> curves; //!< array of curve indices
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer
BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<Vec3ff>> tangents = device; //!< tangent array for each timestep
Device::vector<BufferView<Vec3fa>> dnormals = device; //!< normal derivative array for each timestep
BufferView<char> flags; //!< start, end flag per segment
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
int tessellationRate; //!< tessellation rate for flat curve
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
template<template<typename Ty> class Curve>
struct CurveGeometryInterface : public CurveGeometry
{
typedef Curve<Vec3ff> Curve3ff;
typedef Curve<Vec3fa> Curve3fa;
CurveGeometryInterface (Device* device, Geometry::GType gtype)
: CurveGeometry(device,gtype) {}
__forceinline const Curve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
Vec3ff v0 = vertex(index+0,itime);
Vec3ff v1 = vertex(index+1,itime);
Vec3ff v2 = vertex(index+2,itime);
Vec3ff v3 = vertex(index+3,itime);
v0.w *= maxRadiusScale;
v1.w *= maxRadiusScale;
v2.w *= maxRadiusScale;
v3.w *= maxRadiusScale;
return Curve3ff (v0,v1,v2,v3);
}
__forceinline const Curve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff v2 = vertex(index+2,itime);
const Vec3ff v3 = vertex(index+3,itime);
const Vec3ff w0(xfmPoint(space,(Vec3fa)v0), maxRadiusScale*v0.w);
const Vec3ff w1(xfmPoint(space,(Vec3fa)v1), maxRadiusScale*v1.w);
const Vec3ff w2(xfmPoint(space,(Vec3fa)v2), maxRadiusScale*v2.w);
const Vec3ff w3(xfmPoint(space,(Vec3fa)v3), maxRadiusScale*v3.w);
return Curve3ff(w0,w1,w2,w3);
}
__forceinline const Curve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff v2 = vertex(index+2,itime);
const Vec3ff v3 = vertex(index+3,itime);
const Vec3ff w0(xfmPoint(space,((Vec3fa)v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
const Vec3ff w1(xfmPoint(space,((Vec3fa)v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
const Vec3ff w2(xfmPoint(space,((Vec3fa)v2-ofs)*Vec3fa(scale)), maxRadiusScale*v2.w*r_scale);
const Vec3ff w3(xfmPoint(space,((Vec3fa)v3-ofs)*Vec3fa(scale)), maxRadiusScale*v3.w*r_scale);
return Curve3ff(w0,w1,w2,w3);
}
__forceinline const Curve3fa getNormalCurve(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
const Vec3fa n2 = normal(index+2,itime);
const Vec3fa n3 = normal(index+3,itime);
return Curve3fa (n0,n1,n2,n3);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
{
const Curve3ff center = getCurveScaledRadius(i,itime);
const Curve3fa normal = getNormalCurve(i,itime);
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
return ocurve;
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
}
/*! check if the i'th primitive is valid at the itime'th time step */
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = curve(i);
if (index+3 >= numVertices()) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const float r0 = radius(index+0,itime);
const float r1 = radius(index+1,itime);
const float r2 = radius(index+2,itime);
const float r3 = radius(index+3,itime);
if (!isvalid(r0) || !isvalid(r1) || !isvalid(r2) || !isvalid(r3))
return false;
const Vec3fa v0 = vertex(index+0,itime);
const Vec3fa v1 = vertex(index+1,itime);
const Vec3fa v2 = vertex(index+2,itime);
const Vec3fa v3 = vertex(index+3,itime);
if (!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3))
return false;
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
{
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
if (!isvalid(n0) || !isvalid(n1))
return false;
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
if (!isvalid(b))
return false;
}
}
return true;
}
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+1)*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+2)*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+3)*stride+ofs]);
const Curve<vfloat<N>> curve(p0,p1,p2,p3);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
}
}
void interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
};
template<template<typename Ty> class Curve>
struct HermiteCurveGeometryInterface : public CurveGeometry
{
typedef Curve<Vec3ff> HermiteCurve3ff;
typedef Curve<Vec3fa> HermiteCurve3fa;
HermiteCurveGeometryInterface (Device* device, Geometry::GType gtype)
: CurveGeometry(device,gtype) {}
__forceinline const HermiteCurve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
Vec3ff v0 = vertex(index+0,itime);
Vec3ff v1 = vertex(index+1,itime);
Vec3ff t0 = tangent(index+0,itime);
Vec3ff t1 = tangent(index+1,itime);
v0.w *= maxRadiusScale;
v1.w *= maxRadiusScale;
t0.w *= maxRadiusScale;
t1.w *= maxRadiusScale;
return HermiteCurve3ff (v0,t0,v1,t1);
}
__forceinline const HermiteCurve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
const Vec3ff V0(xfmPoint(space,(Vec3fa)v0),maxRadiusScale*v0.w);
const Vec3ff V1(xfmPoint(space,(Vec3fa)v1),maxRadiusScale*v1.w);
const Vec3ff T0(xfmVector(space,(Vec3fa)t0),maxRadiusScale*t0.w);
const Vec3ff T1(xfmVector(space,(Vec3fa)t1),maxRadiusScale*t1.w);
return HermiteCurve3ff(V0,T0,V1,T1);
}
__forceinline const HermiteCurve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
const Vec3ff V0(xfmPoint(space,(v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
const Vec3ff V1(xfmPoint(space,(v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
const Vec3ff T0(xfmVector(space,t0*Vec3fa(scale)), maxRadiusScale*t0.w*r_scale);
const Vec3ff T1(xfmVector(space,t1*Vec3fa(scale)), maxRadiusScale*t1.w*r_scale);
return HermiteCurve3ff(V0,T0,V1,T1);
}
__forceinline const HermiteCurve3fa getNormalCurve(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
const Vec3fa dn0 = dnormal(index+0,itime);
const Vec3fa dn1 = dnormal(index+1,itime);
return HermiteCurve3fa (n0,dn0,n1,dn1);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
{
const HermiteCurve3ff center = getCurveScaledRadius(i,itime);
const HermiteCurve3fa normal = getNormalCurve(i,itime);
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
return ocurve;
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
}
/*! check if the i'th primitive is valid at the itime'th time step */
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = curve(i);
if (index+1 >= numVertices()) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
if (!isvalid4(v0) || !isvalid4(v1))
return false;
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
if (!isvalid4(t0) || !isvalid4(t1))
return false;
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
{
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
if (!isvalid(n0) || !isvalid(n1))
return false;
const Vec3fa dn0 = dnormal(index+0,itime);
const Vec3fa dn1 = dnormal(index+1,itime);
if (!isvalid(dn0) || !isvalid(dn1))
return false;
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
if (!isvalid(b))
return false;
}
}
return true;
}
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* we interpolate vertex attributes linearly for hermite basis */
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
assert(bufferSlot <= vertexAttribs.size());
const char* vsrc = vertexAttribs[bufferSlot].getPtr();
const size_t vstride = vertexAttribs[bufferSlot].getStride();
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, madd(1.0f-u,p0,u*p1));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, p1-p0);
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
}
}
/* interpolation for vertex buffers */
else
{
assert(bufferSlot < numTimeSteps);
const char* vsrc = vertices[bufferSlot].getPtr();
const char* tsrc = tangents[bufferSlot].getPtr();
const size_t vstride = vertices[bufferSlot].getStride();
const size_t tstride = vertices[bufferSlot].getStride();
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
const vfloat<N> t0 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+0)*tstride+ofs]);
const vfloat<N> t1 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+1)*tstride+ofs]);
const HermiteCurveT<vfloat<N>> curve(p0,t0,p1,t1);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
}
}
}
void interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
};
}
DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType);
}

View file

@ -0,0 +1,468 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! Grid Mesh */
struct GridMesh : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
/*! grid */
struct Grid
{
unsigned int startVtxID;
unsigned int lineVtxOffset;
unsigned short resX,resY;
/* border flags due to 3x3 vertex pattern */
__forceinline unsigned int get3x3FlagsX(const unsigned int x) const
{
return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
}
/* border flags due to 3x3 vertex pattern */
__forceinline unsigned int get3x3FlagsY(const unsigned int y) const
{
return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
}
/*! outputs grid structure */
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
}
};
public:
/*! grid mesh construction */
GridMesh (Device* device);
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float U = args->u;
float V = args->v;
/* clamp input u,v to [0;1] range */
U = max(min(U,1.0f),0.0f);
V = max(min(V,1.0f),0.0f);
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
const Grid& grid = grids[primID];
const int grid_width = grid.resX-1;
const int grid_height = grid.resY-1;
const float rcp_grid_width = rcp(float(grid_width));
const float rcp_grid_height = rcp(float(grid_height));
const int iu = min((int)floor(U*grid_width ),grid_width);
const int iv = min((int)floor(V*grid_height),grid_height);
const float u = U*grid_width-float(iu);
const float v = V*grid_height-float(iv);
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu;
const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu;
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]);
const vbool<N> left = u+v <= 1.0f;
const vfloat<N> Q0 = select(left,p0,p2);
const vfloat<N> Q1 = select(left,p1,p3);
const vfloat<N> Q2 = select(left,p3,p1);
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
const vfloat<N> W = 1.0f-U-V;
if (P) {
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
}
if (dPdu) {
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width);
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height);
}
if (ddPdudu) {
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
}
}
}
void addElementsToCount (GeometryCounts & counts) const;
__forceinline unsigned int getNumTotalQuads() const
{
size_t quads = 0;
for (size_t primID=0; primID<numPrimitives; primID++)
quads += getNumQuads(primID);
return quads;
}
__forceinline unsigned int getNumQuads(const size_t gridID) const
{
const Grid& g = grid(gridID);
return (unsigned int) max((int)1,((int)g.resX-1) * ((int)g.resY-1));
}
__forceinline unsigned int getNumSubGrids(const size_t gridID) const
{
const Grid& g = grid(gridID);
return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th grid*/
__forceinline const Grid& grid(size_t i) const {
return grids[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of for specified time */
__forceinline const Vec3fa vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa v0 = vertex(i, itime+0);
Vec3fa v1 = vertex(i, itime+1);
return madd(Vec3fa(t0),v0,t1*v1);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th vertex of the first timestep */
__forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
assert(x < (size_t)g.resX);
assert(y < (size_t)g.resY);
return g.startVtxID + x + y * g.lineVtxOffset;
}
/*! returns i'th vertex of the first timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index);
}
/*! returns i'th vertex of the itime'th timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index,itime);
}
/*! returns i'th vertex of the itime'th timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, float time) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index,time);
}
/*! gathers quad vertices */
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y) const
{
v0 = grid_vertex(g,x+0,y+0);
v1 = grid_vertex(g,x+1,y+0);
v2 = grid_vertex(g,x+1,y+1);
v3 = grid_vertex(g,x+0,y+1);
}
/*! gathers quad vertices for specified time */
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
{
v0 = grid_vertex(g,x+0,y+0,time);
v1 = grid_vertex(g,x+1,y+0,time);
v2 = grid_vertex(g,x+1,y+1,time);
v3 = grid_vertex(g,x+0,y+1,time);
}
/*! gathers quad vertices for mblur and non-mblur meshes */
__forceinline void gather_quad_vertices_safe(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
{
if (hasMotionBlur()) gather_quad_vertices(v0,v1,v2,v3,g,x,y,time);
else gather_quad_vertices(v0,v1,v2,v3,g,x,y);
}
/*! calculates the build bounds of the i'th quad, if it's valid */
__forceinline bool buildBoundsQuad(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
{
BBox3fa b(empty);
for (size_t t=0; t<numTimeSteps; t++)
{
for (size_t y=sy;y<sy+2;y++)
for (size_t x=sx;x<sx+2;x++)
{
const Vec3fa v = grid_vertex(g,x,y,t);
if (unlikely(!isvalid(v))) return false;
b.extend(v);
}
}
bbox = b;
return true;
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
{
BBox3fa b(empty);
for (size_t t=0; t<numTimeSteps; t++)
{
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
{
const Vec3fa v = grid_vertex(g,x,y,t);
if (unlikely(!isvalid(v))) return false;
b.extend(v);
}
}
bbox = b;
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
{
assert(itime < numTimeSteps);
BBox3fa b0(empty);
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
{
const Vec3fa v = grid_vertex(g,x,y,itime);
if (unlikely(!isvalid(v))) return false;
b0.extend(v);
}
/* use bounds of first time step in builder */
bbox = b0;
return true;
}
__forceinline bool valid(size_t gridID, size_t itime=0) const {
return valid(gridID, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
{
if (unlikely(gridID >= grids.size())) return false;
const Grid &g = grid(gridID);
if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false;
if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
for (size_t y=0;y<g.resY;y++)
for (size_t x=0;x<g.resX;x++)
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(grid_vertex(g,x,y,itime))) return false;
return true;
}
__forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
{
BBox3fa box(empty);
buildBounds(g,sx,sy,itime,box);
return box;
}
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
BBox3fa bounds0, bounds1;
buildBounds(g,sx,sy,itime+0,bounds0);
buildBounds(g,sx,sy,itime+1,bounds1);
return LBBox3fa(bounds0,bounds1);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return pos_inf;
}
public:
BufferView<Grid> grids; //!< array of triangles
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attributes
#if defined(EMBREE_SYCL_SUPPORT)
public:
struct PrimID_XY { uint32_t primID; uint16_t x,y; };
Device::vector<PrimID_XY> quadID_to_primID_xy = device; //!< maps a quad to the primitive ID and grid coordinates
#endif
};
namespace isa
{
struct GridMeshISA : public GridMesh
{
GridMeshISA (Device* device)
: GridMesh(device) {}
LBBox3fa vlinearBounds(size_t buildID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const override {
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
return linearBounds(grid(primID),x,y,time_range);
}
#if defined(EMBREE_SYCL_SUPPORT)
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
const PrimID_XY& quad = quadID_to_primID_xy[j];
if (!buildBoundsQuad(grids[quad.primID],quad.x,quad.y,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
#endif
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j)) continue;
const GridMesh::Grid &g = grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
{
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)k);
pinfo.add_center2(prim);
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[k++] = prim;
}
}
}
return pinfo;
}
#if defined(EMBREE_SYCL_SUPPORT)
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
const PrimID_XY& quad = quadID_to_primID_xy[j];
const LBBox3fa lbounds = linearBounds(grids[quad.primID],quad.x,quad.y,t0t1);
const PrimRef prim(lbounds.bounds(), unsigned(geomID), unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
#endif
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const GridMesh::Grid &g = grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
{
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
const PrimRefMB prim(linearBounds(g,x,y,t0t1),numTimeSegments(),time_range,numTimeSegments(),unsigned(geomID),unsigned(k));
pinfoMB.add_primref(prim);
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[k++] = prim;
}
}
}
return pinfoMB;
}
};
}
DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
}

View file

@ -0,0 +1,302 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "accel.h"
namespace embree
{
struct MotionDerivativeCoefficients;
/*! Instanced acceleration structure */
struct Instance : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
public:
Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1);
~Instance();
private:
Instance (const Instance& other) DELETED; // do not implement
Instance& operator= (const Instance& other) DELETED; // do not implement
private:
LBBox3fa nonlinearBounds(const BBox1f& time_range_in,
const BBox1f& geom_time_range,
float geom_time_segments) const;
BBox3fa boundSegment(size_t itime,
BBox3fa const& obbox0, BBox3fa const& obbox1,
BBox3fa const& bbox0, BBox3fa const& bbox1,
float t_min, float t_max) const;
/* calculates the (correct) interpolated bounds */
__forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(slerp(local2world[itime0], local2world[itime1], f),
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
return xfmBounds(lerp(local2world[itime0], local2world[itime1], f),
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
}
public:
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
virtual void setInstancedScene(const Ref<Scene>& scene) override;
virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override;
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override;
virtual AffineSpace3fa getTransform(float time) override;
virtual AffineSpace3fa getTransform(size_t, float time) override;
virtual void setMask (unsigned mask) override;
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
public:
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i) const {
assert(i == 0);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds());
return xfmBounds(local2world[0],object->bounds.bounds());
}
/*! gets the bounds of the instanced scene */
__forceinline BBox3fa getObjectBounds(size_t itime) const {
return object->getBounds(timeStep(itime));
}
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
assert(i == 0);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime));
return xfmBounds(local2world[itime],getObjectBounds(itime));
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
assert(i == 0);
LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments);
return lbbox;
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
assert(i==0);
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
assert(i==0);
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds ();
return isvalid(bounds);
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
assert(i == 0);
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(bounds(i,itime))) return false;
return true;
}
__forceinline AffineSpace3fa getLocal2World() const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return quaternionDecompositionToAffineSpace(local2world[0]);
return local2world[0];
}
__forceinline AffineSpace3fa getLocal2World(float t) const
{
if (numTimeSegments() > 0) {
float ftime; const unsigned int itime = timeSegment(t, ftime);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return slerp(local2world[itime+0],local2world[itime+1],ftime);
return lerp(local2world[itime+0],local2world[itime+1],ftime);
}
return getLocal2World();
}
__forceinline AffineSpace3fa getWorld2Local() const {
return world2local0;
}
__forceinline AffineSpace3fa getWorld2Local(float t) const {
if (numTimeSegments() > 0)
return rcp(getLocal2World(t));
return getWorld2Local();
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return getWorld2LocalSlerp<K>(valid, t);
return getWorld2LocalLerp<K>(valid, t);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return area(bounds(i));
}
private:
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]),
AffineSpace3vff<K>(local2world[itime+1]),
ftime));
}
else {
AffineSpace3vff<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0);
space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1);
}
return rcp(slerp(space0, space1, ftime));
}
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]),
AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]),
ftime));
} else {
AffineSpace3vf<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0);
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1);
}
return rcp(lerp(space0, space1, ftime));
}
}
public:
Accel* object; //!< pointer to instanced acceleration structure
AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0
};
namespace isa
{
struct InstanceISA : public Instance
{
InstanceISA (Device* device)
: Instance(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
BBox3fa b = empty;
if (!buildBounds(0,&b)) return pinfo;
// const BBox3fa b = bounds(0);
// if (!isvalid(b)) return pinfo;
const PrimRef prim(b,geomID,unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
BBox3fa b = empty;
if (!buildBounds(0,&b)) return pinfo;
// if (!valid(0,range<size_t>(itime))) return pinfo;
// const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0));
const PrimRef prim(b,geomID,unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
const BBox1f t0t1 = intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
const BBox3fa bounds = linearBounds(0, t0t1).bounds();
const PrimRef prim(bounds, geomID, unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfoMB pinfo(empty);
if (!valid(0, timeSegmentRange(t0t1))) return pinfo;
const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0));
pinfo.add_primref(prim);
prims[k++] = prim;
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*);
}

View file

@ -0,0 +1,385 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "accel.h"
namespace embree
{
struct MotionDerivativeCoefficients;
/*! Instanced acceleration structure */
struct InstanceArray : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY;
public:
InstanceArray (Device* device, unsigned int numTimeSteps = 1);
~InstanceArray();
private:
InstanceArray (const InstanceArray& other) DELETED; // do not implement
InstanceArray& operator= (const InstanceArray& other) DELETED; // do not implement
private:
LBBox3fa nonlinearBounds(size_t i,
const BBox1f& time_range_in,
const BBox1f& geom_time_range,
float geom_time_segments) const;
BBox3fa boundSegment(size_t i, size_t itime,
BBox3fa const& obbox0, BBox3fa const& obbox1,
BBox3fa const& bbox0, BBox3fa const& bbox1,
float t_min, float t_max) const;
/* calculates the (correct) interpolated bounds */
__forceinline BBox3fa bounds(size_t i, size_t itime0, size_t itime1, float f) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(slerp(l2w(i, itime0), l2w(i, itime1), f),
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
return xfmBounds(lerp(l2w(i, itime0), l2w(i, itime1), f),
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
}
public:
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
virtual void* getBuffer(RTCBufferType type, unsigned int slot) override;
virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
virtual void setInstancedScene(const Ref<Scene>& scene) override;
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) override;
virtual AffineSpace3fa getTransform(size_t, float time) override;
virtual void setMask (unsigned mask) override;
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
public:
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i) const {
if (!valid(i))
return BBox3fa();
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, 0)),getObject(i)->bounds.bounds());
return xfmBounds(l2w(i, 0),getObject(i)->bounds.bounds());
}
/*! gets the bounds of the instanced scene */
__forceinline BBox3fa getObjectBounds(size_t i, size_t itime) const {
if (!valid(i))
return BBox3fa();
return getObject(i)->getBounds(timeStep(itime));
}
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
if (!valid(i))
return BBox3fa();
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, itime)),getObjectBounds(i, itime));
return xfmBounds(l2w(i, itime),getObjectBounds(i, itime));
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
if (!valid(i))
return LBBox3fa();
LBBox3fa lbbox = nonlinearBounds(i, dt, time_range, fnumTimeSegments);
return lbbox;
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
if (!valid(i))
return false;
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i))
return false;
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds ();
return isvalid(bounds);
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i) const
{
if (object) return true;
return (object_ids[i] != (unsigned int)(-1));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(bounds(i,itime))) return false;
return true;
}
__forceinline AffineSpace3fa getLocal2World(size_t i) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return quaternionDecompositionToAffineSpace(l2w(i,0));
return l2w(i, 0);
}
__forceinline AffineSpace3fa getLocal2World(size_t i, float t) const
{
if (numTimeSegments() > 0) {
float ftime; const unsigned int itime = timeSegment(t, ftime);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return slerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
return lerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
}
return getLocal2World(i);
}
__forceinline AffineSpace3fa getWorld2Local(size_t i) const {
return rcp(getLocal2World(i));
}
__forceinline AffineSpace3fa getWorld2Local(size_t i, float t) const {
return rcp(getLocal2World(i, t));
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2Local(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return getWorld2LocalSlerp<K>(i, valid, t);
return getWorld2LocalLerp<K>(i, valid, t);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return area(bounds(i));
}
inline Accel* getObject(size_t i) const {
if (object) {
return object;
}
assert(objects);
assert(i < numPrimitives);
if (object_ids[i] == (unsigned int)(-1))
return nullptr;
assert(object_ids[i] < numObjects);
return objects[object_ids[i]];
}
private:
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(slerp(AffineSpace3vff<K>(l2w(i, itime+0)),
AffineSpace3vff<K>(l2w(i, itime+1)),
ftime));
}
else {
AffineSpace3vff<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+0)), space0);
space1 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+1)), space1);
}
return rcp(slerp(space0, space1, ftime));
}
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)),
AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)),
ftime));
} else {
AffineSpace3vf<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)), space0);
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)), space1);
}
return rcp(lerp(space0, space1, ftime));
}
}
private:
__forceinline AffineSpace3ff l2w(size_t i, size_t itime) const {
if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR) {
return *(AffineSpace3ff*)(l2w_buf[itime].getPtr(i));
}
else if(l2w_buf[itime].getFormat() == RTC_FORMAT_QUATERNION_DECOMPOSITION) {
AffineSpace3ff transform;
QuaternionDecomposition* qd = (QuaternionDecomposition*)l2w_buf[itime].getPtr(i);
transform.l.vx.x = qd->scale_x;
transform.l.vy.y = qd->scale_y;
transform.l.vz.z = qd->scale_z;
transform.l.vy.x = qd->skew_xy;
transform.l.vz.x = qd->skew_xz;
transform.l.vz.y = qd->skew_yz;
transform.l.vx.y = qd->translation_x;
transform.l.vx.z = qd->translation_y;
transform.l.vy.z = qd->translation_z;
transform.p.x = qd->shift_x;
transform.p.y = qd->shift_y;
transform.p.z = qd->shift_z;
// normalize quaternion
Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k);
q = normalize(q);
transform.l.vx.w = q.i;
transform.l.vy.w = q.j;
transform.l.vz.w = q.k;
transform.p.w = q.r;
return transform;
}
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR) {
AffineSpace3f* l2w = reinterpret_cast<AffineSpace3f*>(l2w_buf[itime].getPtr(i));
return AffineSpace3ff(*l2w);
}
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_ROW_MAJOR) {
float* data = reinterpret_cast<float*>(l2w_buf[itime].getPtr(i));
AffineSpace3f l2w;
l2w.l.vx.x = data[0]; l2w.l.vy.x = data[1]; l2w.l.vz.x = data[2]; l2w.p.x = data[3];
l2w.l.vx.y = data[4]; l2w.l.vy.y = data[5]; l2w.l.vz.y = data[6]; l2w.p.y = data[7];
l2w.l.vx.z = data[8]; l2w.l.vy.z = data[9]; l2w.l.vz.z = data[10]; l2w.p.z = data[11];
return l2w;
}
assert(false);
return AffineSpace3ff();
}
inline AffineSpace3ff l2w(size_t i) const {
return l2w(i, 0);
}
private:
Accel* object; //!< fast path if only one scene is instanced
Accel** objects;
uint32_t numObjects;
Device::vector<RawBufferView> l2w_buf = device; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
BufferView<uint32_t> object_ids; //!< array of scene ids per instance array primitive
};
namespace isa
{
struct InstanceArrayISA : public InstanceArray
{
InstanceArrayISA (Device* device)
: InstanceArray(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, &bounds) || !valid(j))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, itime, bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = linearBounds(j, t0t1);
if (!isvalid(lbounds.bounds()))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
if (!valid(j, timeSegmentRange(t0t1)))
continue;
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(InstanceArray*, createInstanceArray, Device*);
}

View file

@ -0,0 +1,634 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! represents an array of line segments */
struct LineSegments : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2;
public:
/*! line segments construction */
LineSegments (Device* device, Geometry::GType gtype);
public:
void setMask (unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify ();
void interpolate(const RTCInterpolateArguments* const args);
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t segment = segments[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+1)*stride+ofs]);
if (P ) mem<vfloat<N>>::storeu(valid,P+i,lerp(p0,p1,u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0);
if (ddPdudu) mem<vfloat<N>>::storeu(valid,dPdu+i,vfloat<N>(zero));
}
}
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns the i'th segment */
__forceinline const unsigned int& segment(size_t i) const {
return segments[i];
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
/*! returns the i'th segment */
template<int M>
__forceinline const vuint<M> vsegment(const vuint<M>& i) const {
return segments[i.v];
}
#endif
/*! returns the segment to the left of the i'th segment */
__forceinline bool segmentLeftExists(size_t i) const {
assert (flags);
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0;
}
/*! returns the segment to the right of the i'th segment */
__forceinline bool segmentRightExists(size_t i) const {
assert (flags);
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0;
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid) const
{
p0 = vertex(vid+0);
p1 = vertex(vid+1);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid) const
{
p0 = vertex(vid.v+0);
p1 = vertex(vid.v+1);
}
#endif
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, size_t itime) const
{
p0 = vertex(vid+0,itime);
p1 = vertex(vid+1,itime);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vint<M>& itime) const
{
p0 = vertex(vid.v+0,itime.v);
p1 = vertex(vid.v+1,itime.v);
}
#endif
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1; gather(a0,a1,vid,itime);
Vec3ff b0,b1; gather(b0,b1,vid,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
}
/*! loads curve vertices for specified time for mblur and non-mblur case */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,vid,time);
else gather(p0,p1,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vfloat<M>& time) const
{
vfloat<M> ftime;
const vint<M> itime = timeSegment<M>(time, ftime);
const vfloat<M> t0 = 1.0f - ftime;
const vfloat<M> t1 = ftime;
Vec4vf<M> a0,a1; vgather<M>(a0,a1,vid,itime);
Vec4vf<M> b0,b1; vgather<M>(b0,b1,vid,itime+1);
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
}
#endif
/*! gathers the cone curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, unsigned int vid) const
{
gather(p0,p1,vid);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid) const
{
vgather<M>(p0,p1,vid);
cL = !segmentLeftExists (primID.v);
cR = !segmentRightExists(primID.v);
}
#endif
/*! gathers the cone curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, size_t itime) const
{
gather(p0,p1,vid,itime);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
/*! loads cone curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
{
gather(p0,p1,vid,time);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
/*! loads cone curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,cL,cR,primID,vid,time);
else gather(p0,p1,cL,cR,primID,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
{
vgather<M>(p0,p1,vid,time);
cL = !segmentLeftExists (primID.v);
cR = !segmentRightExists(primID.v);
}
#endif
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid) const
{
p0 = vertex(vid+0);
p1 = vertex(vid+1);
p2 = segmentLeftExists (primID) ? vertex(vid-1) : Vec3ff(inf);
p3 = segmentRightExists(primID) ? vertex(vid+2) : Vec3ff(inf);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid) const
{
p0 = vertex(vid.v+0);
p1 = vertex(vid.v+1);
vbool<M> left = segmentLeftExists (primID.v);
vbool<M> right = segmentRightExists(primID.v);
vuint<M> i2 = select(left, vid-1,vid+0);
vuint<M> i3 = select(right,vid+2,vid+1);
p2 = vertex(i2.v);
p3 = vertex(i3.v);
p2 = select(left, p2,Vec4vf<M>(inf));
p3 = select(right,p3,Vec4vf<M>(inf));
}
#endif
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, size_t itime) const
{
p0 = vertex(vid+0,itime);
p1 = vertex(vid+1,itime);
p2 = segmentLeftExists (primID) ? vertex(vid-1,itime) : Vec3ff(inf);
p3 = segmentRightExists(primID) ? vertex(vid+2,itime) : Vec3ff(inf);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vint<M>& itime) const
{
p0 = vertex(vid.v+0, itime.v);
p1 = vertex(vid.v+1, itime.v);
vbool<M> left = segmentLeftExists (primID.v);
vbool<M> right = segmentRightExists(primID.v);
vuint<M> i2 = select(left, vid-1,vid+0);
vuint<M> i3 = select(right,vid+2,vid+1);
p2 = vertex(i2.v, itime.v);
p3 = vertex(i3.v, itime.v);
p2 = select(left, p2,Vec4vf<M>(inf));
p3 = select(right,p3,Vec4vf<M>(inf));
}
#endif
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3; gather(a0,a1,a2,a3,primID,vid,itime);
Vec3ff b0,b1,b2,b3; gather(b0,b1,b2,b3,primID,vid,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
}
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,primID,vid,time);
else gather(p0,p1,p2,p3,primID,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
{
vfloat<M> ftime;
const vint<M> itime = timeSegment<M>(time, ftime);
const vfloat<M> t0 = 1.0f - ftime;
const vfloat<M> t1 = ftime;
Vec4vf<M> a0,a1,a2,a3; vgather<M>(a0,a1,a2,a3,primID,vid,itime);
Vec4vf<M> b0,b1,b2,b3; vgather<M>(b0,b1,b2,b3,primID,vid,itime+1);
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
p2 = madd(Vec4vf<M>(t0),a2,t1*b2);
p3 = madd(Vec4vf<M>(t0),a3,t1*b3);
}
#endif
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const
{
const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1));
return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w)));
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(size_t i) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0);
const Vec3ff v1 = vertex(index+1);
return bounds(v0,v1);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
return bounds(v0,v1);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0);
const Vec3ff v1 = vertex(index+1);
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
return bounds(w0,w1);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
return bounds(w0,w1);
}
/*! calculates bounding box of i'th segment */
__forceinline BBox3fa bounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff w0(xfmVector(space,(v0-ofs)*Vec3fa(scale)),maxRadiusScale*v0.w*r_scale);
const Vec3ff w1(xfmVector(space,(v1-ofs)*Vec3fa(scale)),maxRadiusScale*v1.w*r_scale);
return bounds(w0,w1);
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = segment(i);
if (index+1 >= numVertices()) return false;
#if !defined(__SYCL_DEVICE_ONLY__)
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false;
const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false;
if (min(v0.w,v1.w) < 0.0f) return false;
}
#endif
return true;
}
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
{
if (!valid(i,0)) return false;
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i,itime+0) || !valid(i,itime+1)) return false;
bbox = bounds(i,itime); // use bounds of first time step in builder
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(ofs, scale, r_scale0, space, primID, itime); }, dt, this->time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
public:
BufferView<unsigned int> segments; //!< array of line segment indices
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
BufferView<char> flags; //!< start, end flag per segment
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
int tessellationRate; //!< tessellation rate for bezier curve
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
struct LineSegmentsISA : public LineSegments
{
LineSegmentsISA (Device* device, Geometry::GType gtype)
: LineSegments(device,gtype) {}
LinearSpace3fa computeAlignedSpace(const size_t primID) const
{
const Vec3fa dir = normalize(computeDirection(primID));
if (is_finite(dir)) return frame(dir);
else return LinearSpace3fa(one);
}
LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const
{
Vec3fa axisz(0,0,1);
Vec3fa axisy(0,1,0);
const range<int> tbounds = this->timeSegmentRange(time_range);
if (tbounds.size() == 0) return frame(axisz);
const size_t itime = (tbounds.begin()+tbounds.end())/2;
const Vec3fa dir = normalize(computeDirection(primID,itime));
if (is_finite(dir)) return frame(dir);
else return LinearSpace3fa(one);
}
Vec3fa computeDirection(unsigned int primID) const
{
const unsigned vtxID = segment(primID);
const Vec3fa v0 = vertex(vtxID+0);
const Vec3fa v1 = vertex(vtxID+1);
return v1-v0;
}
Vec3fa computeDirection(unsigned int primID, size_t time) const
{
const unsigned vtxID = segment(primID);
const Vec3fa v0 = vertex(vtxID+0,time);
const Vec3fa v1 = vertex(vtxID+1,time);
return v1-v0;
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
BBox3fa vbounds(size_t i) const {
return bounds(i);
}
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const {
return bounds(space,i);
}
BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return bounds(ofs,scale,r_scale0,space,i,itime);
}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
return linearBounds(space,primID,time_range);
}
LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
return linearBounds(ofs,scale,r_scale0,space,primID,time_range);
}
};
}
DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType);
}

View file

@ -0,0 +1,361 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "buffer.h"
#include "default.h"
#include "geometry.h"
namespace embree
{
/*! represents an array of points */
struct Points : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS;
public:
/*! line segments construction */
Points(Device* device, Geometry::GType gtype);
public:
void setMask(unsigned mask);
void setNumTimeSteps(unsigned int numTimeSteps);
void setVertexAttributeCount(unsigned int N);
void setBuffer(RTCBufferType type,
unsigned int slot,
RTCFormat format,
const Ref<Buffer>& buffer,
size_t offset,
size_t stride,
unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3ff vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff v0 = vertex(i, itime+0);
Vec3ff v1 = vertex(i, itime+1);
return madd(Vec3ff(t0),v0,t1*v1);
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3ff vertex_safe(size_t i, float time) const
{
if (hasMotionBlur()) return vertex(i,time);
else return vertex(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th normal of for specified time */
__forceinline Vec3fa normal(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa n0 = normal(i, itime+0);
Vec3fa n1 = normal(i, itime+1);
return madd(Vec3fa(t0),n0,t1*n1);
}
/*! returns i'th normal of for specified time */
__forceinline Vec3fa normal_safe(size_t i, float time) const
{
if (hasMotionBlur()) return normal(i,time);
else return normal(i);
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! returns i'th radius of for specified time */
__forceinline float radius(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
float r0 = radius(i, itime+0);
float r1 = radius(i, itime+1);
return madd(t0,r0,t1*r1);
}
/*! returns i'th radius of for specified time */
__forceinline float radius_safe(size_t i, float time) const
{
if (hasMotionBlur()) return radius(i,time);
else return radius(i);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const Vec3ff& v0) const {
return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w));
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(size_t i) const
{
const Vec3ff v0 = vertex(i);
return bounds(v0);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const Vec3ff v0 = vertex(i, itime);
return bounds(v0);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
{
const Vec3ff v0 = vertex(i);
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
return bounds(w0);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
{
const Vec3ff v0 = vertex(i, itime);
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
return bounds(w0);
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = (unsigned int)i;
if (index >= numVertices())
return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) {
const Vec3ff v0 = vertex(index + 0, itime);
if (unlikely(!isvalid4(v0)))
return false;
if (v0.w < 0.0f)
return false;
}
return true;
}
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
{
if (!valid(i, 0))
return false;
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i, itime + 0) || !valid(i, itime + 1))
return false;
bbox = bounds(i, itime); // use bounds of first time step in builder
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
const float R = radius(i);
return 1 + 2*M_PI*R*R;
}
public:
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
struct PointsISA : public Points
{
PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {}
Vec3fa computeDirection(unsigned int primID) const
{
return Vec3fa(1, 0, 0);
}
Vec3fa computeDirection(unsigned int primID, size_t time) const
{
return Vec3fa(1, 0, 0);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, &bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, itime, bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims,
const BBox1f& t0t1,
const range<size_t>& r,
size_t k,
unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
if (!valid(j, timeSegmentRange(t0t1)))
continue;
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
BBox3fa vbounds(size_t i) const
{
return bounds(i);
}
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const
{
return bounds(space, i);
}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const
{
return linearBounds(primID, time_range);
}
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const
{
return linearBounds(space, primID, time_range);
}
};
} // namespace isa
DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType);
} // namespace embree

View file

@ -0,0 +1,376 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! Quad Mesh */
struct QuadMesh : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH;
/*! triangle indices */
struct Quad
{
Quad() {}
Quad (uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) {
v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3;
}
/*! outputs triangle indices */
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) {
return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }";
}
uint32_t v[4];
};
public:
/*! quad mesh construction */
QuadMesh (Device* device);
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
void addElementsToCount (GeometryCounts & counts) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
float v = args->v;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const size_t ofs = i*sizeof(float);
const Quad& tri = quad(primID);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]);
const vbool<N> left = u+v <= 1.0f;
const vfloat<N> Q0 = select(left,p0,p2);
const vfloat<N> Q1 = select(left,p1,p3);
const vfloat<N> Q2 = select(left,p3,p1);
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
const vfloat<N> W = 1.0f-U-V;
if (P) {
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
}
if (dPdu) {
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1));
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2));
}
if (ddPdudu) {
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
}
}
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th quad */
__forceinline const Quad& quad(size_t i) const {
return quads[i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3fa vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa v0 = vertex(i, itime+0);
Vec3fa v1 = vertex(i, itime+1);
return madd(Vec3fa(t0),v0,t1*v1);
}
/*! calculates the bounds of the i'th quad */
__forceinline BBox3fa bounds(size_t i) const
{
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0]);
const Vec3fa v1 = vertex(q.v[1]);
const Vec3fa v2 = vertex(q.v[2]);
const Vec3fa v3 = vertex(q.v[3]);
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
}
/*! calculates the bounds of the i'th quad at the itime'th timestep */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0],itime);
const Vec3fa v1 = vertex(q.v[1],itime);
const Vec3fa v2 = vertex(q.v[2],itime);
const Vec3fa v3 = vertex(q.v[3],itime);
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const Quad& q = quad(i);
if (unlikely(q.v[0] >= numVertices())) return false;
if (unlikely(q.v[1] >= numVertices())) return false;
if (unlikely(q.v[2] >= numVertices())) return false;
if (unlikely(q.v[3] >= numVertices())) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
if (!isvalid(vertex(q.v[0],itime))) return false;
if (!isvalid(vertex(q.v[1],itime))) return false;
if (!isvalid(vertex(q.v[2],itime))) return false;
if (!isvalid(vertex(q.v[3],itime))) return false;
}
return true;
}
/*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
const Quad& q = quad(i);
if (q.v[0] >= numVertices()) return false;
if (q.v[1] >= numVertices()) return false;
if (q.v[2] >= numVertices()) return false;
if (q.v[3] >= numVertices()) return false;
for (size_t t=0; t<numTimeSteps; t++)
{
const Vec3fa v0 = vertex(q.v[0],t);
const Vec3fa v1 = vertex(q.v[1],t);
const Vec3fa v2 = vertex(q.v[2],t);
const Vec3fa v3 = vertex(q.v[3],t);
if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)))
return false;
}
if (bbox)
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
const Quad& q = quad(i);
if (unlikely(q.v[0] >= numVertices())) return false;
if (unlikely(q.v[1] >= numVertices())) return false;
if (unlikely(q.v[2] >= numVertices())) return false;
if (unlikely(q.v[3] >= numVertices())) return false;
assert(itime+1 < numTimeSteps);
const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false;
const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false;
/* use bounds of first time step in builder */
bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3));
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(dt))) return false;
bbox = linearBounds(i, dt);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return quads.modCounter;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return quads.isModified(otherVersion); // || numPrimitivesChanged;
}
/* returns the projected area */
__forceinline float projectedPrimitiveArea(const size_t i) const {
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0]);
const Vec3fa v1 = vertex(q.v[1]);
const Vec3fa v2 = vertex(q.v[2]);
const Vec3fa v3 = vertex(q.v[3]);
return areaProjectedTriangle(v0,v1,v3) +
areaProjectedTriangle(v1,v2,v3);
}
public:
BufferView<Quad> quads; //!< array of quads
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attribute buffers
};
namespace isa
{
struct QuadMeshISA : public QuadMesh
{
QuadMeshISA (Device* device)
: QuadMesh(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*);
}

View file

@ -0,0 +1,329 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
#include "../subdiv/half_edge.h"
#include "../subdiv/tessellation_cache.h"
#include "../subdiv/catmullclark_coefficients.h"
#include "../subdiv/patch.h"
namespace embree
{
struct HoleSet;
struct VertexCreaseMap;
struct EdgeCreaseMap;
class SubdivMesh : public Geometry
{
ALIGNED_CLASS_(16);
public:
typedef HalfEdge::Edge Edge;
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH;
/*! structure used to sort half edges using radix sort by their key */
struct KeyHalfEdge
{
KeyHalfEdge() {}
KeyHalfEdge (uint64_t key, HalfEdge* edge)
: key(key), edge(edge) {}
__forceinline operator uint64_t() const {
return key;
}
friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) {
return e0.key < e1.key;
}
public:
uint64_t key;
HalfEdge* edge;
};
public:
/*! subdiv mesh construction */
SubdivMesh(Device* device);
~SubdivMesh();
public:
void setMask (unsigned mask);
void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode);
void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setTopologyCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void setTessellationRate(float N);
bool verify();
void commit();
void addElementsToCount (GeometryCounts & counts) const;
void setDisplacementFunction (RTCDisplacementFunctionN func);
unsigned int getFirstHalfEdge(unsigned int faceID);
unsigned int getFace(unsigned int edgeID);
unsigned int getNextHalfEdge(unsigned int edgeID);
unsigned int getPreviousHalfEdge(unsigned int edgeID);
unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID);
public:
/*! return the number of faces */
size_t numFaces() const {
return faceVertices.size();
}
/*! return the number of edges */
size_t numEdges() const {
return topology[0].vertexIndices.size();
}
/*! return the number of vertices */
size_t numVertices() const {
return vertices[0].size();
}
/*! calculates the bounds of the i'th subdivision patch at the j'th timestep */
__forceinline BBox3fa bounds(size_t i, size_t j = 0) const {
return topology[0].getHalfEdge(i)->bounds(vertices[j]);
}
/*! check if the i'th primitive is valid */
__forceinline bool valid(size_t i) const {
return topology[0].valid(i) && !invalidFace(i);
}
/*! check if the i'th primitive is valid for the j'th time range */
__forceinline bool valid(size_t i, size_t j) const {
return topology[0].valid(i) && !invalidFace(i,j);
}
/*! prints some statistics */
void printStatistics();
/*! initializes the half edge data structure */
void initializeHalfEdgeStructures ();
public:
/*! returns the vertex buffer for some time step */
__forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const {
return vertices[t];
}
/* returns tessellation level of edge */
__forceinline float getEdgeLevel(const size_t i) const
{
if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level?
else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level?
}
public:
RTCDisplacementFunctionN displFunc; //!< displacement function
/*! all buffers in this section are provided by the application */
public:
/*! the topology contains all data that may differ when
* interpolating different user data buffers */
struct Topology
{
public:
/*! Default topology construction */
Topology () : halfEdges(nullptr,0) {}
/*! Topology initialization */
Topology (SubdivMesh* mesh);
/*! make the class movable */
public:
Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
: mesh(std::move(other.mesh)),
vertexIndices(std::move(other.vertexIndices)),
subdiv_mode(std::move(other.subdiv_mode)),
halfEdges(std::move(other.halfEdges)),
halfEdges0(std::move(other.halfEdges0)),
halfEdges1(std::move(other.halfEdges1)) {}
Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
{
mesh = std::move(other.mesh);
vertexIndices = std::move(other.vertexIndices);
subdiv_mode = std::move(other.subdiv_mode);
halfEdges = std::move(other.halfEdges);
halfEdges0 = std::move(other.halfEdges0);
halfEdges1 = std::move(other.halfEdges1);
return *this;
}
public:
/*! check if the i'th primitive is valid in this topology */
__forceinline bool valid(size_t i) const
{
if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) {
if (getHalfEdge(i)->faceHasBorder()) return false;
}
return true;
}
/*! updates the interpolation mode for the topology */
void setSubdivisionMode (RTCSubdivisionMode mode);
/*! marks all buffers as modified */
void update ();
/*! verifies index array */
bool verify (size_t numVertices);
/*! initializes the half edge data structure */
void initializeHalfEdgeStructures ();
private:
/*! recalculates the half edges */
void calculateHalfEdges();
/*! updates half edges when recalculation is not necessary */
void updateHalfEdges();
/*! user input data */
public:
SubdivMesh* mesh;
/*! indices of the vertices composing each face */
BufferView<unsigned int> vertexIndices;
/*! subdiv interpolation mode */
RTCSubdivisionMode subdiv_mode;
/*! generated data */
public:
/*! returns the start half edge for face f */
__forceinline const HalfEdge* getHalfEdge ( const size_t f ) const {
return &halfEdges[mesh->faceStartEdge[f]];
}
/*! Half edge structure, generated by initHalfEdgeStructures */
mvector<HalfEdge> halfEdges;
/*! the following data is only required during construction of the
* half edge structure and can be cleared for static scenes */
private:
/*! two arrays used to sort the half edges */
std::vector<KeyHalfEdge> halfEdges0;
std::vector<KeyHalfEdge> halfEdges1;
};
/*! returns the start half edge for topology t and face f */
__forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const {
return topology[t].getHalfEdge(f);
}
/*! buffer containing the number of vertices for each face */
BufferView<unsigned int> faceVertices;
/*! array of topologies */
vector<Topology> topology;
/*! vertex buffer (one buffer for each time step) */
vector<BufferView<Vec3fa>> vertices;
/*! user data buffers */
vector<RawBufferView> vertexAttribs;
/*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */
BufferView<Edge> edge_creases;
/*! edge crease weights for each edge of the edge_creases buffer */
BufferView<float> edge_crease_weights;
/*! vertex crease buffer containing all vertices that carry vertex crease weights */
BufferView<unsigned int> vertex_creases;
/*! vertex crease weights for each vertex of the vertex_creases buffer */
BufferView<float> vertex_crease_weights;
/*! subdivision level for each half edge of the vertexIndices buffer */
BufferView<float> levels;
float tessellationRate; // constant rate that is used when levels is not set
/*! buffer that marks specific faces as holes */
BufferView<unsigned> holes;
/*! all data in this section is generated by initializeHalfEdgeStructures function */
private:
/*! number of half edges used by faces */
size_t numHalfEdges;
/*! fast lookup table to find the first half edge for some face */
mvector<uint32_t> faceStartEdge;
/*! fast lookup table to find the face for some half edge */
mvector<uint32_t> halfEdgeFace;
/*! set with all holes */
std::unique_ptr<HoleSet> holeSet;
/*! fast lookup table to detect invalid faces */
mvector<char> invalid_face;
/*! test if face i is invalid in timestep j */
__forceinline char& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; }
__forceinline const char& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; }
/*! interpolation cache */
public:
static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; }
static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; }
static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) {
const size_t slots = numInterpolationSlots4(stride);
assert(slot < slots);
return slots*prim+slot;
}
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags;
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags;
std::vector<Patch3fa::Ref> patch_eval_trees;
/*! the following data is only required during construction of the
* half edge structure and can be cleared for static scenes */
private:
/*! map with all vertex creases */
std::unique_ptr<VertexCreaseMap> vertexCreaseMap;
/*! map with all edge creases */
std::unique_ptr<EdgeCreaseMap> edgeCreaseMap;
protected:
/*! counts number of geometry commits */
size_t commitCounter;
};
namespace isa
{
struct SubdivMeshISA : public SubdivMesh
{
SubdivMeshISA (Device* device)
: SubdivMesh(device) {}
void interpolate(const RTCInterpolateArguments* const args);
void interpolateN(const RTCInterpolateNArguments* const args);
};
}
DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*);
};

Some files were not shown because too many files have changed in this diff Show more