feat: godot-engine-source-4.3-stable

This commit is contained in:
Jan van der Weide 2025-01-17 16:36:38 +01:00
parent c59a7dcade
commit 7125d019b5
11149 changed files with 5070401 additions and 0 deletions

View file

@ -0,0 +1,474 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "ray.h"
#include "point_query.h"
#include "context.h"
namespace embree
{
class Scene;
/*! Base class for the acceleration structure data. */
class AccelData : public RefCount
{
ALIGNED_CLASS_(16);
public:
enum Type { TY_UNKNOWN = 0, TY_ACCELN = 1, TY_ACCEL_INSTANCE = 2, TY_BVH4 = 3, TY_BVH8 = 4, TY_GPU = 5 };
public:
AccelData (const Type type)
: bounds(empty), type(type) {}
/*! notifies the acceleration structure about the deletion of some geometry */
virtual void deleteGeometry(size_t geomID) {};
/*! clears the acceleration structure data */
virtual void clear() = 0;
/*! returns normal bounds */
__forceinline BBox3fa getBounds() const {
return bounds.bounds();
}
/*! returns bounds for some time */
__forceinline BBox3fa getBounds(float t) const {
return bounds.interpolate(t);
}
/*! returns linear bounds */
__forceinline LBBox3fa getLinearBounds() const {
return bounds;
}
/*! checks if acceleration structure is empty */
__forceinline bool isEmpty() const {
return bounds.bounds0.lower.x == float(pos_inf);
}
public:
LBBox3fa bounds; // linear bounds
Type type;
};
/*! Base class for all intersectable and buildable acceleration structures. */
class Accel : public AccelData
{
ALIGNED_CLASS_(16);
public:
struct Intersectors;
/*! Type of collide function */
typedef void (*CollideFunc)(void* bvh0, void* bvh1, RTCCollideFunc callback, void* userPtr);
/*! Type of point query function */
typedef bool(*PointQueryFunc)(Intersectors* This, /*!< this pointer to accel */
PointQuery* query, /*!< point query for lookup */
PointQueryContext* context); /*!< point query context */
/*! Type of intersect function pointer for single rays. */
typedef void (*IntersectFunc)(Intersectors* This, /*!< this pointer to accel */
RTCRayHit& ray, /*!< ray to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 4. */
typedef void (*IntersectFunc4)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit4& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 8. */
typedef void (*IntersectFunc8)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit8& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of intersect function pointer for ray packets of size 16. */
typedef void (*IntersectFunc16)(const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRayHit16& ray, /*!< ray packet to intersect */
RayQueryContext* context);
/*! Type of occlusion function pointer for single rays. */
typedef void (*OccludedFunc) (Intersectors* This, /*!< this pointer to accel */
RTCRay& ray, /*!< ray to test occlusion */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 4. */
typedef void (*OccludedFunc4) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay4& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 8. */
typedef void (*OccludedFunc8) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay8& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
/*! Type of occlusion function pointer for ray packets of size 16. */
typedef void (*OccludedFunc16) (const void* valid, /*!< pointer to valid mask */
Intersectors* This, /*!< this pointer to accel */
RTCRay16& ray, /*!< ray packet to test occlusion. */
RayQueryContext* context);
typedef void (*ErrorFunc) ();
struct Collider
{
Collider (ErrorFunc error = nullptr)
: collide((CollideFunc)error), name(nullptr) {}
Collider (CollideFunc collide, const char* name)
: collide(collide), name(name) {}
operator bool() const { return name; }
public:
CollideFunc collide;
const char* name;
};
struct Intersector1
{
Intersector1 (ErrorFunc error = nullptr)
: intersect((IntersectFunc)error), occluded((OccludedFunc)error), name(nullptr) {}
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, const char* name)
: intersect(intersect), occluded(occluded), pointQuery(nullptr), name(name) {}
Intersector1 (IntersectFunc intersect, OccludedFunc occluded, PointQueryFunc pointQuery, const char* name)
: intersect(intersect), occluded(occluded), pointQuery(pointQuery), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc intersect;
OccludedFunc occluded;
PointQueryFunc pointQuery;
const char* name;
};
struct Intersector4
{
Intersector4 (ErrorFunc error = nullptr)
: intersect((IntersectFunc4)error), occluded((OccludedFunc4)error), name(nullptr) {}
Intersector4 (IntersectFunc4 intersect, OccludedFunc4 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc4 intersect;
OccludedFunc4 occluded;
const char* name;
};
struct Intersector8
{
Intersector8 (ErrorFunc error = nullptr)
: intersect((IntersectFunc8)error), occluded((OccludedFunc8)error), name(nullptr) {}
Intersector8 (IntersectFunc8 intersect, OccludedFunc8 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc8 intersect;
OccludedFunc8 occluded;
const char* name;
};
struct Intersector16
{
Intersector16 (ErrorFunc error = nullptr)
: intersect((IntersectFunc16)error), occluded((OccludedFunc16)error), name(nullptr) {}
Intersector16 (IntersectFunc16 intersect, OccludedFunc16 occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
operator bool() const { return name; }
public:
static const char* type;
IntersectFunc16 intersect;
OccludedFunc16 occluded;
const char* name;
};
struct Intersectors
{
Intersectors()
: ptr(nullptr), leafIntersector(nullptr), collider(nullptr), intersector1(nullptr), intersector4(nullptr), intersector8(nullptr), intersector16(nullptr) {}
Intersectors (ErrorFunc error)
: ptr(nullptr), leafIntersector(nullptr), collider(error), intersector1(error), intersector4(error), intersector8(error), intersector16(error) {}
void print(size_t ident)
{
if (collider.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "collider = " << collider.name << std::endl;
}
if (intersector1.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector1 = " << intersector1.name << std::endl;
}
if (intersector4.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector4 = " << intersector4.name << std::endl;
}
if (intersector8.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector8 = " << intersector8.name << std::endl;
}
if (intersector16.name) {
for (size_t i=0; i<ident; i++) std::cout << " ";
std::cout << "intersector16 = " << intersector16.name << std::endl;
}
}
void select(bool filter)
{
if (intersector4_filter) {
if (filter) intersector4 = intersector4_filter;
else intersector4 = intersector4_nofilter;
}
if (intersector8_filter) {
if (filter) intersector8 = intersector8_filter;
else intersector8 = intersector8_nofilter;
}
if (intersector16_filter) {
if (filter) intersector16 = intersector16_filter;
else intersector16 = intersector16_nofilter;
}
}
__forceinline bool pointQuery (PointQuery* query, PointQueryContext* context) {
assert(intersector1.pointQuery);
return intersector1.pointQuery(this,query,context);
}
/*! collides two scenes */
__forceinline void collide (Accel* scene0, Accel* scene1, RTCCollideFunc callback, void* userPtr) {
assert(collider.collide);
collider.collide(scene0->intersectors.ptr,scene1->intersectors.ptr,callback,userPtr);
}
/*! Intersects a single ray with the scene. */
__forceinline void intersect (RTCRayHit& ray, RayQueryContext* context) {
assert(intersector1.intersect);
intersector1.intersect(this,ray,context);
}
/*! Intersects a packet of 4 rays with the scene. */
__forceinline void intersect4 (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
assert(intersector4.intersect);
intersector4.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 8 rays with the scene. */
__forceinline void intersect8 (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
assert(intersector8.intersect);
intersector8.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 16 rays with the scene. */
__forceinline void intersect16 (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
assert(intersector16.intersect);
intersector16.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 4 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit4& ray, RayQueryContext* context) {
assert(intersector4.intersect);
intersector4.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 8 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit8& ray, RayQueryContext* context) {
assert(intersector8.intersect);
intersector8.intersect(valid,this,ray,context);
}
/*! Intersects a packet of 16 rays with the scene. */
__forceinline void intersect (const void* valid, RTCRayHit16& ray, RayQueryContext* context) {
assert(intersector16.intersect);
intersector16.intersect(valid,this,ray,context);
}
#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, RayQueryContext* context) {
const vint<4> mask = valid.mask32();
intersect4(&mask,(RTCRayHit4&)ray,context);
}
#endif
#if defined(__AVX__)
__forceinline void intersect(const vbool8& valid, RayHitK<8>& ray, RayQueryContext* context) {
const vint<8> mask = valid.mask32();
intersect8(&mask,(RTCRayHit8&)ray,context);
}
#endif
#if defined(__AVX512F__)
__forceinline void intersect(const vbool16& valid, RayHitK<16>& ray, RayQueryContext* context) {
const vint<16> mask = valid.mask32();
intersect16(&mask,(RTCRayHit16&)ray,context);
}
#endif
/*! Tests if single ray is occluded by the scene. */
__forceinline void occluded (RTCRay& ray, RayQueryContext* context) {
assert(intersector1.occluded);
intersector1.occluded(this,ray,context);
}
/*! Tests if a packet of 4 rays is occluded by the scene. */
__forceinline void occluded4 (const void* valid, RTCRay4& ray, RayQueryContext* context) {
assert(intersector4.occluded);
intersector4.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 8 rays is occluded by the scene. */
__forceinline void occluded8 (const void* valid, RTCRay8& ray, RayQueryContext* context) {
assert(intersector8.occluded);
intersector8.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 16 rays is occluded by the scene. */
__forceinline void occluded16 (const void* valid, RTCRay16& ray, RayQueryContext* context) {
assert(intersector16.occluded);
intersector16.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 4 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay4& ray, RayQueryContext* context) {
assert(intersector4.occluded);
intersector4.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 8 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay8& ray, RayQueryContext* context) {
assert(intersector8.occluded);
intersector8.occluded(valid,this,ray,context);
}
/*! Tests if a packet of 16 rays is occluded by the scene. */
__forceinline void occluded (const void* valid, RTCRay16& ray, RayQueryContext* context) {
assert(intersector16.occluded);
intersector16.occluded(valid,this,ray,context);
}
#if defined(__SSE__) || defined(__ARM_NEON)
__forceinline void occluded(const vbool4& valid, RayK<4>& ray, RayQueryContext* context) {
const vint<4> mask = valid.mask32();
occluded4(&mask,(RTCRay4&)ray,context);
}
#endif
#if defined(__AVX__)
__forceinline void occluded(const vbool8& valid, RayK<8>& ray, RayQueryContext* context) {
const vint<8> mask = valid.mask32();
occluded8(&mask,(RTCRay8&)ray,context);
}
#endif
#if defined(__AVX512F__)
__forceinline void occluded(const vbool16& valid, RayK<16>& ray, RayQueryContext* context) {
const vint<16> mask = valid.mask32();
occluded16(&mask,(RTCRay16&)ray,context);
}
#endif
/*! Tests if single ray is occluded by the scene. */
__forceinline void intersect(RTCRay& ray, RayQueryContext* context) {
occluded(ray, context);
}
/*! Tests if a packet of K rays is occluded by the scene. */
template<int K>
__forceinline void intersect(const vbool<K>& valid, RayK<K>& ray, RayQueryContext* context) {
occluded(valid, ray, context);
}
public:
AccelData* ptr;
void* leafIntersector;
Collider collider;
Intersector1 intersector1;
Intersector4 intersector4;
Intersector4 intersector4_filter;
Intersector4 intersector4_nofilter;
Intersector8 intersector8;
Intersector8 intersector8_filter;
Intersector8 intersector8_nofilter;
Intersector16 intersector16;
Intersector16 intersector16_filter;
Intersector16 intersector16_nofilter;
};
public:
/*! Construction */
Accel (const AccelData::Type type)
: AccelData(type) {}
/*! Construction */
Accel (const AccelData::Type type, const Intersectors& intersectors)
: AccelData(type), intersectors(intersectors) {}
/*! Virtual destructor */
virtual ~Accel() {}
/*! makes the acceleration structure immutable */
virtual void immutable () {}
/*! build acceleration structure */
virtual void build () = 0;
public:
Intersectors intersectors;
};
#define DEFINE_COLLIDER(symbol,collider) \
Accel::Collider symbol() { \
return Accel::Collider((Accel::CollideFunc)collider::collide, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR1(symbol,intersector) \
Accel::Intersector1 symbol() { \
return Accel::Intersector1((Accel::IntersectFunc )intersector::intersect, \
(Accel::OccludedFunc )intersector::occluded, \
(Accel::PointQueryFunc)intersector::pointQuery,\
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR4(symbol,intersector) \
Accel::Intersector4 symbol() { \
return Accel::Intersector4((Accel::IntersectFunc4)intersector::intersect, \
(Accel::OccludedFunc4)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR8(symbol,intersector) \
Accel::Intersector8 symbol() { \
return Accel::Intersector8((Accel::IntersectFunc8)intersector::intersect, \
(Accel::OccludedFunc8)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
#define DEFINE_INTERSECTOR16(symbol,intersector) \
Accel::Intersector16 symbol() { \
return Accel::Intersector16((Accel::IntersectFunc16)intersector::intersect, \
(Accel::OccludedFunc16)intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
}

View file

@ -0,0 +1,41 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "accel.h"
#include "builder.h"
namespace embree
{
class AccelInstance : public Accel
{
public:
AccelInstance (AccelData* accel, Builder* builder, Intersectors& intersectors)
: Accel(AccelData::TY_ACCEL_INSTANCE,intersectors), accel(accel), builder(builder) {}
void immutable () {
builder.reset(nullptr);
}
public:
void build () {
if (builder) builder->build();
bounds = accel->bounds;
}
void deleteGeometry(size_t geomID) {
if (accel ) accel->deleteGeometry(geomID);
if (builder) builder->deleteGeometry(geomID);
}
void clear() {
if (accel) accel->clear();
if (builder) builder->clear();
}
private:
std::unique_ptr<AccelData> accel;
std::unique_ptr<Builder> builder;
};
}

View file

@ -0,0 +1,214 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "acceln.h"
#include "ray.h"
#include "../../include/embree4/rtcore_ray.h"
#include "../../common/algorithms/parallel_for.h"
namespace embree
{
AccelN::AccelN()
: Accel(AccelData::TY_ACCELN), accels() {}
AccelN::~AccelN()
{
for (size_t i=0; i<accels.size(); i++)
delete accels[i];
}
void AccelN::accels_add(Accel* accel)
{
assert(accel);
accels.push_back(accel);
}
void AccelN::accels_init()
{
for (size_t i=0; i<accels.size(); i++)
delete accels[i];
accels.clear();
}
bool AccelN::pointQuery (Accel::Intersectors* This_in, PointQuery* query, PointQueryContext* context)
{
bool changed = false;
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
changed |= This->accels[i]->intersectors.pointQuery(query,context);
return changed;
}
void AccelN::intersect (Accel::Intersectors* This_in, RTCRayHit& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect(ray,context);
}
void AccelN::intersect4 (const void* valid, Accel::Intersectors* This_in, RTCRayHit4& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect4(valid,ray,context);
}
void AccelN::intersect8 (const void* valid, Accel::Intersectors* This_in, RTCRayHit8& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect8(valid,ray,context);
}
void AccelN::intersect16 (const void* valid, Accel::Intersectors* This_in, RTCRayHit16& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++)
if (!This->accels[i]->isEmpty())
This->accels[i]->intersectors.intersect16(valid,ray,context);
}
void AccelN::occluded (Accel::Intersectors* This_in, RTCRay& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded(ray,context);
if (ray.tfar < 0.0f) break;
}
}
void AccelN::occluded4 (const void* valid, Accel::Intersectors* This_in, RTCRay4& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded4(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON)
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
if (unlikely(none(valid0 & hit0))) break;
#endif
}
}
void AccelN::occluded8 (const void* valid, Accel::Intersectors* This_in, RTCRay8& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded8(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
if (unlikely((none((valid0 & hit0) | (valid1 & hit1))))) break;
#endif
}
}
void AccelN::occluded16 (const void* valid, Accel::Intersectors* This_in, RTCRay16& ray, RayQueryContext* context)
{
AccelN* This = (AccelN*)This_in->ptr;
for (size_t i=0; i<This->accels.size(); i++) {
if (This->accels[i]->isEmpty()) continue;
This->accels[i]->intersectors.occluded16(valid,ray,context);
#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA
vbool4 valid0 = asBool(((vint4*)valid)[0]);
vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero);
vbool4 valid1 = asBool(((vint4*)valid)[1]);
vbool4 hit1 = ((vfloat4*)ray.tfar)[1] >= vfloat4(zero);
vbool4 valid2 = asBool(((vint4*)valid)[2]);
vbool4 hit2 = ((vfloat4*)ray.tfar)[2] >= vfloat4(zero);
vbool4 valid3 = asBool(((vint4*)valid)[3]);
vbool4 hit3 = ((vfloat4*)ray.tfar)[3] >= vfloat4(zero);
if (unlikely((none((valid0 & hit0) | (valid1 & hit1) | (valid2 & hit2) | (valid3 & hit3))))) break;
#endif
}
}
void AccelN::accels_print(size_t ident)
{
for (size_t i=0; i<accels.size(); i++)
{
for (size_t j=0; j<ident; j++) std::cout << " ";
std::cout << "accels[" << i << "]" << std::endl;
accels[i]->intersectors.print(ident+2);
}
}
void AccelN::accels_immutable()
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->immutable();
}
void AccelN::accels_build ()
{
/* reduce memory consumption */
accels.shrink_to_fit();
/* build all acceleration structures in parallel */
parallel_for (accels.size(), [&] (size_t i) {
accels[i]->build();
});
/* create list of non-empty acceleration structures */
bool valid1 = true;
bool valid4 = true;
bool valid8 = true;
bool valid16 = true;
for (size_t i=0; i<accels.size(); i++) {
valid1 &= (bool) accels[i]->intersectors.intersector1;
valid4 &= (bool) accels[i]->intersectors.intersector4;
valid8 &= (bool) accels[i]->intersectors.intersector8;
valid16 &= (bool) accels[i]->intersectors.intersector16;
}
if (accels.size() == 1) {
type = accels[0]->type; // FIXME: should just assign entire Accel
bounds = accels[0]->bounds;
intersectors = accels[0]->intersectors;
}
else
{
type = AccelData::TY_ACCELN;
intersectors.ptr = this;
intersectors.intersector1 = Intersector1(&intersect,&occluded,&pointQuery,valid1 ? "AccelN::intersector1": nullptr);
intersectors.intersector4 = Intersector4(&intersect4,&occluded4,valid4 ? "AccelN::intersector4" : nullptr);
intersectors.intersector8 = Intersector8(&intersect8,&occluded8,valid8 ? "AccelN::intersector8" : nullptr);
intersectors.intersector16 = Intersector16(&intersect16,&occluded16,valid16 ? "AccelN::intersector16": nullptr);
/*! calculate bounds */
bounds = empty;
for (size_t i=0; i<accels.size(); i++)
bounds.extend(accels[i]->bounds);
}
}
void AccelN::accels_select(bool filter)
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->intersectors.select(filter);
}
void AccelN::accels_deleteGeometry(size_t geomID)
{
for (size_t i=0; i<accels.size(); i++)
accels[i]->deleteGeometry(geomID);
}
void AccelN::accels_clear()
{
for (size_t i=0; i<accels.size(); i++) {
accels[i]->clear();
}
}
}

View file

@ -0,0 +1,47 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "accel.h"
namespace embree
{
/*! merges N acceleration structures together, by processing them in order */
class AccelN : public Accel
{
public:
AccelN ();
~AccelN();
public:
void accels_add(Accel* accel);
void accels_init();
public:
static bool pointQuery (Accel::Intersectors* This, PointQuery* query, PointQueryContext* context);
public:
static void intersect (Accel::Intersectors* This, RTCRayHit& ray, RayQueryContext* context);
static void intersect4 (const void* valid, Accel::Intersectors* This, RTCRayHit4& ray, RayQueryContext* context);
static void intersect8 (const void* valid, Accel::Intersectors* This, RTCRayHit8& ray, RayQueryContext* context);
static void intersect16 (const void* valid, Accel::Intersectors* This, RTCRayHit16& ray, RayQueryContext* context);
public:
static void occluded (Accel::Intersectors* This, RTCRay& ray, RayQueryContext* context);
static void occluded4 (const void* valid, Accel::Intersectors* This, RTCRay4& ray, RayQueryContext* context);
static void occluded8 (const void* valid, Accel::Intersectors* This, RTCRay8& ray, RayQueryContext* context);
static void occluded16 (const void* valid, Accel::Intersectors* This, RTCRay16& ray, RayQueryContext* context);
public:
void accels_print(size_t ident);
void accels_immutable();
void accels_build ();
void accels_select(bool filter);
void accels_deleteGeometry(size_t geomID);
void accels_clear ();
public:
std::vector<Accel*> accels;
};
}

View file

@ -0,0 +1,17 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "accelset.h"
#include "scene.h"
namespace embree
{
AccelSet::AccelSet (Device* device, Geometry::GType gtype, size_t numItems, size_t numTimeSteps)
: Geometry(device,gtype,(unsigned int)numItems,(unsigned int)numTimeSteps), boundsFunc(nullptr) {}
AccelSet::IntersectorN::IntersectorN (ErrorFunc error)
: intersect((IntersectFuncN)error), occluded((OccludedFuncN)error), name(nullptr) {}
AccelSet::IntersectorN::IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name)
: intersect(intersect), occluded(occluded), name(name) {}
}

View file

@ -0,0 +1,347 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "builder.h"
#include "geometry.h"
#include "ray.h"
#include "hit.h"
namespace embree
{
struct IntersectFunctionNArguments;
struct OccludedFunctionNArguments;
struct IntersectFunctionNArguments : public RTCIntersectFunctionNArguments
{
Geometry* geometry;
RTCScene forward_scene;
RTCIntersectArguments* args;
};
struct OccludedFunctionNArguments : public RTCOccludedFunctionNArguments
{
Geometry* geometry;
RTCScene forward_scene;
RTCIntersectArguments* args;
};
/*! Base class for set of acceleration structures. */
class AccelSet : public Geometry
{
public:
typedef RTCIntersectFunctionN IntersectFuncN;
typedef RTCOccludedFunctionN OccludedFuncN;
typedef void (*ErrorFunc) ();
struct IntersectorN
{
IntersectorN (ErrorFunc error = nullptr) ;
IntersectorN (IntersectFuncN intersect, OccludedFuncN occluded, const char* name);
operator bool() const { return name; }
public:
static const char* type;
IntersectFuncN intersect;
OccludedFuncN occluded;
const char* name;
};
public:
/*! construction */
AccelSet (Device* device, Geometry::GType gtype, size_t items, size_t numTimeSteps);
/*! makes the acceleration structure immutable */
virtual void immutable () {}
/*! build accel */
virtual void build () = 0;
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid_non_empty(bounds(i,itime))) return false;
return true;
}
/*! Calculates the bounds of an item */
__forceinline BBox3fa bounds(size_t i, size_t itime = 0) const
{
BBox3fa box;
assert(i < size());
RTCBoundsFunctionArguments args;
args.geometryUserPtr = userPtr;
args.primID = (unsigned int)i;
args.timeStep = (unsigned int)itime;
args.bounds_o = (RTCBounds*)&box;
boundsFunc(&args);
return box;
}
/*! calculates the linear bounds of the i'th item at the itime'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const
{
BBox3fa box[2];
assert(i < size());
RTCBoundsFunctionArguments args;
args.geometryUserPtr = userPtr;
args.primID = (unsigned int)i;
args.timeStep = (unsigned int)(itime+0);
args.bounds_o = (RTCBounds*)&box[0];
boundsFunc(&args);
args.timeStep = (unsigned int)(itime+1);
args.bounds_o = (RTCBounds*)&box[1];
boundsFunc(&args);
return LBBox3fa(box[0],box[1]);
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid_non_empty(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds0; // use bounding box of first timestep to build BVH
return isvalid_non_empty(bounds);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const {
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
public:
/*! Intersects a single ray with the scene. */
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
int mask = -1;
IntersectFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
IntersectFuncN intersectFunc = nullptr;
intersectFunc = intersectorN.intersect;
if (context->getIntersectFunction())
intersectFunc = context->getIntersectFunction();
assert(intersectFunc);
intersectFunc(&args);
return mask != 0;
}
/*! Tests if single ray is occluded by the scene. */
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
int mask = -1;
OccludedFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
OccludedFuncN occludedFunc = nullptr;
occludedFunc = intersectorN.occluded;
if (context->getOccludedFunction())
occludedFunc = context->getOccludedFunction();
assert(occludedFunc);
occludedFunc(&args);
return mask != 0;
}
/*! Intersects a single ray with the scene. */
__forceinline bool intersect (RayHit& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
{
assert(primID < size());
int mask = -1;
IntersectFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = nullptr;
typedef void (*RTCIntersectFunctionSYCL)(const void* args);
RTCIntersectFunctionSYCL intersectFunc = nullptr;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
intersectFunc = (RTCIntersectFunctionSYCL) intersectorN.intersect;
#endif
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
if (context->getIntersectFunction())
intersectFunc = (RTCIntersectFunctionSYCL) context->getIntersectFunction();
if (intersectFunc)
intersectFunc(&args);
forward_scene = args.forward_scene;
return mask != 0;
}
/*! Tests if single ray is occluded by the scene. */
__forceinline bool occluded (Ray& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context, RTCScene& forward_scene)
{
assert(primID < size());
int mask = -1;
OccludedFunctionNArguments args;
args.valid = &mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = 1;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = nullptr;
typedef void (*RTCOccludedFunctionSYCL)(const void* args);
RTCOccludedFunctionSYCL occludedFunc = nullptr;
#if EMBREE_SYCL_GEOMETRY_CALLBACK
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY)
occludedFunc = (RTCOccludedFunctionSYCL) intersectorN.occluded;
#endif
if (context->args->feature_mask & RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS)
if (context->getOccludedFunction())
occludedFunc = (RTCOccludedFunctionSYCL) context->getOccludedFunction();
if (occludedFunc)
occludedFunc(&args);
forward_scene = args.forward_scene;
return mask != 0;
}
/*! Intersects a packet of K rays with the scene. */
template<int K>
__forceinline void intersect (const vbool<K>& valid, RayHitK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
vint<K> mask = valid.mask32();
IntersectFunctionNArguments args;
args.valid = (int*)&mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.rayhit = (RTCRayHitN*)&ray;
args.N = K;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
IntersectFuncN intersectFunc = nullptr;
intersectFunc = intersectorN.intersect;
if (context->getIntersectFunction())
intersectFunc = context->getIntersectFunction();
assert(intersectFunc);
intersectFunc(&args);
}
/*! Tests if a packet of K rays is occluded by the scene. */
template<int K>
__forceinline void occluded (const vbool<K>& valid, RayK<K>& ray, unsigned int geomID, unsigned int primID, RayQueryContext* context)
{
assert(primID < size());
vint<K> mask = valid.mask32();
OccludedFunctionNArguments args;
args.valid = (int*)&mask;
args.geometryUserPtr = userPtr;
args.context = context->user;
args.ray = (RTCRayN*)&ray;
args.N = K;
args.geomID = geomID;
args.primID = primID;
args.geometry = this;
args.forward_scene = nullptr;
args.args = context->args;
OccludedFuncN occludedFunc = nullptr;
occludedFunc = intersectorN.occluded;
if (context->getOccludedFunction())
occludedFunc = context->getOccludedFunction();
assert(occludedFunc);
occludedFunc(&args);
}
public:
RTCBoundsFunction boundsFunc;
IntersectorN intersectorN;
};
#define DEFINE_SET_INTERSECTORN(symbol,intersector) \
AccelSet::IntersectorN symbol() { \
return AccelSet::IntersectorN(intersector::intersect, \
intersector::occluded, \
TOSTRING(isa) "::" TOSTRING(symbol)); \
}
}

View file

@ -0,0 +1,82 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "alloc.h"
#include "../../common/sys/thread.h"
#if defined(APPLE) && defined(__aarch64__)
#include "../../common/sys/barrier.h"
#endif
namespace embree
{
__thread FastAllocator::ThreadLocal2* FastAllocator::thread_local_allocator2 = nullptr;
MutexSys FastAllocator::s_thread_local_allocators_lock;
std::vector<std::unique_ptr<FastAllocator::ThreadLocal2>> FastAllocator::s_thread_local_allocators;
struct fast_allocator_regression_test : public RegressionTest
{
BarrierSys barrier;
std::atomic<size_t> numFailed;
std::unique_ptr<FastAllocator> alloc;
fast_allocator_regression_test()
: RegressionTest("fast_allocator_regression_test"), numFailed(0)
{
registerRegressionTest(this);
}
static void thread_alloc(fast_allocator_regression_test* This)
{
FastAllocator::CachedAllocator threadalloc = This->alloc->getCachedAllocator();
size_t* ptrs[1000];
for (size_t j=0; j<1000; j++)
{
This->barrier.wait();
for (size_t i=0; i<1000; i++) {
ptrs[i] = (size_t*) threadalloc.malloc0(sizeof(size_t)+(i%32));
*ptrs[i] = size_t(threadalloc.talloc0) + i;
}
for (size_t i=0; i<1000; i++) {
if (*ptrs[i] != size_t(threadalloc.talloc0) + i)
This->numFailed++;
}
This->barrier.wait();
}
}
bool run ()
{
alloc = make_unique(new FastAllocator(nullptr,false));
numFailed.store(0);
size_t numThreads = getNumberOfLogicalThreads();
barrier.init(numThreads+1);
/* create threads */
std::vector<thread_t> threads;
for (size_t i=0; i<numThreads; i++)
threads.push_back(createThread((thread_func)thread_alloc,this));
/* run test */
for (size_t i=0; i<1000; i++)
{
alloc->reset();
barrier.wait();
barrier.wait();
}
/* destroy threads */
for (size_t i=0; i<numThreads; i++)
join(threads[i]);
alloc = nullptr;
return numFailed == 0;
}
};
fast_allocator_regression_test fast_allocator_regression;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,280 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
namespace embree
{
/*! Implements an API data buffer object. This class may or may not own the data. */
class Buffer : public RefCount
{
public:
/*! Buffer construction */
//Buffer()
//: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
/*! Buffer construction */
Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
: device(device), numBytes(numBytes_in)
{
device->refInc();
if (ptr_in)
{
shared = true;
ptr = (char*)ptr_in;
}
else
{
shared = false;
alloc();
}
}
/*! Buffer destruction */
~Buffer() {
free();
device->refDec();
}
/*! this class is not copyable */
private:
Buffer(const Buffer& other) DELETED; // do not implement
Buffer& operator =(const Buffer& other) DELETED; // do not implement
public:
/* inits and allocates the buffer */
void create(Device* device_in, size_t numBytes_in)
{
init(device_in, numBytes_in);
alloc();
}
/* inits the buffer */
void init(Device* device_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = nullptr;
numBytes = numBytes_in;
shared = false;
}
/*! sets shared buffer */
void set(Device* device_in, void* ptr_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = (char*)ptr_in;
if (numBytes_in != (size_t)-1)
numBytes = numBytes_in;
shared = true;
}
/*! allocated buffer */
void alloc()
{
device->memoryMonitor(this->bytes(), false);
size_t b = (this->bytes()+15) & ssize_t(-16);
ptr = (char*)device->malloc(b,16);
}
/*! frees the buffer */
void free()
{
if (shared) return;
device->free(ptr);
device->memoryMonitor(-ssize_t(this->bytes()), true);
ptr = nullptr;
}
/*! gets buffer pointer */
void* data()
{
/* report error if buffer is not existing */
if (!device)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
/* return buffer */
return ptr;
}
/*! returns pointer to first element */
__forceinline char* getPtr() const {
return ptr;
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
return numBytes;
}
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr;
}
public:
Device* device; //!< device to report memory usage to
char* ptr; //!< pointer to buffer data
size_t numBytes; //!< number of bytes in the buffer
bool shared; //!< set if memory is shared with application
};
/*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
class RawBufferView
{
public:
/*! Buffer construction */
RawBufferView()
: ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
public:
/*! sets the buffer view */
void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)
{
if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
ptr_ofs = buffer_in->ptr + offset_in;
stride = stride_in;
num = num_in;
format = format_in;
modCounter++;
modified = true;
buffer = buffer_in;
}
/*! returns pointer to the first element */
__forceinline char* getPtr() const {
return ptr_ofs;
}
/*! returns pointer to the i'th element */
__forceinline char* getPtr(size_t i) const
{
assert(i<num);
return ptr_ofs + i*stride;
}
/*! returns the number of elements of the buffer */
__forceinline size_t size() const {
return num;
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
return num*stride;
}
/*! returns the buffer stride */
__forceinline unsigned getStride() const
{
assert(stride <= unsigned(inf));
return unsigned(stride);
}
/*! return the buffer format */
__forceinline RTCFormat getFormat() const {
return format;
}
/*! mark buffer as modified or unmodified */
__forceinline void setModified() {
modCounter++;
modified = true;
}
/*! mark buffer as modified or unmodified */
__forceinline bool isModified(unsigned int otherModCounter) const {
return modCounter > otherModCounter;
}
/*! mark buffer as modified or unmodified */
__forceinline bool isLocalModified() const {
return modified;
}
/*! clear local modified flag */
__forceinline void clearLocalModified() {
modified = false;
}
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr_ofs;
}
/*! checks padding to 16 byte check, fails hard */
__forceinline void checkPadding16() const
{
if (ptr_ofs && num)
volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?
}
public:
char* ptr_ofs; //!< base pointer plus offset
size_t stride; //!< stride of the buffer in bytes
size_t num; //!< number of elements in the buffer
RTCFormat format; //!< format of the buffer
unsigned int modCounter; //!< version ID of this buffer
bool modified; //!< local modified data
int userData; //!< special data
Ref<Buffer> buffer; //!< reference to the parent buffer
};
/*! A typed contiguous range of a buffer. This class does not own the buffer content. */
template<typename T>
class BufferView : public RawBufferView
{
public:
typedef T value_type;
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
};
template<>
class BufferView<Vec3fa> : public RawBufferView
{
public:
typedef Vec3fa value_type;
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
/*! access to the ith element of the buffer */
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa::loadu(ptr_ofs + i*stride);
}
/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
Vec3fa::storeu(ptr_ofs + i*stride, v);
}
#else
/*! access to the ith element of the buffer */
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));
}
/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);
}
#endif
};
}

View file

@ -0,0 +1,60 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "accel.h"
namespace embree
{
#define MODE_HIGH_QUALITY (1<<8)
/*! virtual interface for all hierarchy builders */
class Builder : public RefCount {
public:
static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;
/*! initiates the hierarchy builder */
virtual void build() = 0;
/*! notifies the builder about the deletion of some geometry */
virtual void deleteGeometry(size_t geomID) {};
/*! clears internal builder state */
virtual void clear() = 0;
};
/*! virtual interface for progress monitor class */
struct BuildProgressMonitor {
virtual void operator() (size_t dn) const = 0;
};
/*! build the progress monitor interface from a closure */
template<typename Closure>
struct ProgressMonitorClosure : BuildProgressMonitor
{
public:
ProgressMonitorClosure (const Closure& closure) : closure(closure) {}
void operator() (size_t dn) const { closure(dn); }
private:
const Closure closure;
};
template<typename Closure> __forceinline const ProgressMonitorClosure<Closure> BuildProgressMonitorFromClosure(const Closure& closure) {
return ProgressMonitorClosure<Closure>(closure);
}
struct LineSegments;
struct TriangleMesh;
struct QuadMesh;
struct UserGeometry;
class Scene;
typedef void (*createLineSegmentsAccelTy)(Scene* scene, LineSegments* mesh, AccelData*& accel, Builder*& builder);
typedef void (*createTriangleMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
typedef void (*createQuadMeshAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
typedef void (*createUserGeometryAccelTy)(Scene* scene, unsigned int geomID, AccelData*& accel, Builder*& builder);
}

View file

@ -0,0 +1,173 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "rtcore.h"
#include "point_query.h"
namespace embree
{
class Scene;
struct RayQueryContext
{
public:
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCIntersectArguments* args)
: scene(scene), user(user_context), args(args) {}
__forceinline RayQueryContext(Scene* scene, RTCRayQueryContext* user_context, RTCOccludedArguments* args)
: scene(scene), user(user_context), args((RTCIntersectArguments*)args) {}
__forceinline bool hasContextFilter() const {
return args->filter != nullptr;
}
RTCFilterFunctionN getFilter() const {
return args->filter;
}
RTCIntersectFunctionN getIntersectFunction() const {
return args->intersect;
}
RTCOccludedFunctionN getOccludedFunction() const {
return (RTCOccludedFunctionN) args->intersect;
}
__forceinline bool isCoherent() const {
return embree::isCoherent(args->flags);
}
__forceinline bool isIncoherent() const {
return embree::isIncoherent(args->flags);
}
__forceinline bool enforceArgumentFilterFunction() const {
return args->flags & RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER;
}
#if RTC_MIN_WIDTH
__forceinline float getMinWidthDistanceFactor() const {
return args->minWidthDistanceFactor;
}
#endif
public:
Scene* scene = nullptr;
RTCRayQueryContext* user = nullptr;
RTCIntersectArguments* args = nullptr;
};
template<int M, typename Geometry>
__forceinline Vec4vf<M> enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3vf<M>& ray_org, const Vec4vf<M>& v)
{
#if RTC_MIN_WIDTH
const vfloat<M> d = length(Vec3vf<M>(v) - ray_org);
const vfloat<M> r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
return Vec4vf<M>(v.x,v.y,v.z,r);
#else
return v;
#endif
}
template<typename Geometry>
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec3ff& v)
{
#if RTC_MIN_WIDTH
const float d = length(Vec3fa(v) - ray_org);
const float r = clamp(context->getMinWidthDistanceFactor()*d, v.w, geom->maxRadiusScale*v.w);
return Vec3ff(v.x,v.y,v.z,r);
#else
return v;
#endif
}
template<typename Geometry>
__forceinline Vec3ff enlargeRadiusToMinWidth(const RayQueryContext* context, const Geometry* geom, const Vec3fa& ray_org, const Vec4f& v) {
return enlargeRadiusToMinWidth(context,geom,ray_org,Vec3ff(v.x,v.y,v.z,v.w));
}
enum PointQueryType
{
POINT_QUERY_TYPE_UNDEFINED = 0,
POINT_QUERY_TYPE_SPHERE = 1,
POINT_QUERY_TYPE_AABB = 2,
};
typedef bool (*PointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
struct PointQueryContext
{
public:
__forceinline PointQueryContext(Scene* scene,
PointQuery* query_ws,
PointQueryType query_type,
PointQueryFunction func,
RTCPointQueryContext* userContext,
float similarityScale,
void* userPtr)
: scene(scene)
, tstate(nullptr)
, query_ws(query_ws)
, query_type(query_type)
, func(func)
, userContext(userContext)
, similarityScale(similarityScale)
, userPtr(userPtr)
, primID(RTC_INVALID_GEOMETRY_ID)
, geomID(RTC_INVALID_GEOMETRY_ID)
, query_radius(query_ws->radius)
{
update();
}
public:
__forceinline void update()
{
if (query_type == POINT_QUERY_TYPE_AABB) {
assert(similarityScale == 0.f);
updateAABB();
}
else{
query_radius = Vec3fa(query_ws->radius * similarityScale);
}
if (userContext->instStackSize == 0) {
assert(similarityScale == 1.f);
}
}
__forceinline void updateAABB()
{
if (likely(query_ws->radius == (float)inf || userContext->instStackSize == 0)) {
query_radius = Vec3fa(query_ws->radius);
return;
}
const AffineSpace3fa m = AffineSpace3fa_load_unaligned((AffineSpace3fa*)userContext->world2inst[userContext->instStackSize-1]);
BBox3fa bbox(Vec3fa(-query_ws->radius), Vec3fa(query_ws->radius));
bbox = xfmBounds(m, bbox);
query_radius = 0.5f * (bbox.upper - bbox.lower);
}
public:
Scene* scene;
void* tstate;
PointQuery* query_ws; // the original world space point query
PointQueryType query_type;
PointQueryFunction func;
RTCPointQueryContext* userContext;
float similarityScale;
void* userPtr;
unsigned int primID;
unsigned int geomID;
Vec3fa query_radius; // used if the query is converted to an AABB internally
};
}

View file

@ -0,0 +1,266 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/sys/platform.h"
#include "../../common/sys/sysinfo.h"
#include "../../common/sys/thread.h"
#include "../../common/sys/alloc.h"
#include "../../common/sys/ref.h"
#include "../../common/sys/intrinsics.h"
#include "../../common/sys/atomic.h"
#include "../../common/sys/mutex.h"
#include "../../common/sys/vector.h"
#include "../../common/sys/array.h"
#include "../../common/sys/estring.h"
#include "../../common/sys/regression.h"
#include "../../common/sys/vector.h"
#include "../../common/math/emath.h"
#include "../../common/math/transcendental.h"
#include "../../common/simd/simd.h"
#include "../../common/math/vec2.h"
#include "../../common/math/vec3.h"
#include "../../common/math/vec4.h"
#include "../../common/math/vec2fa.h"
#include "../../common/math/vec3fa.h"
#include "../../common/math/interval.h"
#include "../../common/math/bbox.h"
#include "../../common/math/obbox.h"
#include "../../common/math/lbbox.h"
#include "../../common/math/linearspace2.h"
#include "../../common/math/linearspace3.h"
#include "../../common/math/affinespace.h"
#include "../../common/math/range.h"
#include "../../common/lexers/tokenstream.h"
#define COMMA ,
#include "../config.h"
#include "isa.h"
#include "stat.h"
#include "profile.h"
#include "rtcore.h"
#include "vector.h"
#include "state.h"
#include "instance_stack.h"
#include <vector>
#include <map>
#include <algorithm>
#include <functional>
#include <utility>
#include <sstream>
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// Vec2 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec2vf = Vec2<vfloat<N>>;
template<int N> using Vec2vd = Vec2<vdouble<N>>;
template<int N> using Vec2vr = Vec2<vreal<N>>;
template<int N> using Vec2vi = Vec2<vint<N>>;
template<int N> using Vec2vl = Vec2<vllong<N>>;
template<int N> using Vec2vb = Vec2<vbool<N>>;
template<int N> using Vec2vbf = Vec2<vboolf<N>>;
template<int N> using Vec2vbd = Vec2<vboold<N>>;
typedef Vec2<vfloat4> Vec2vf4;
typedef Vec2<vdouble4> Vec2vd4;
typedef Vec2<vreal4> Vec2vr4;
typedef Vec2<vint4> Vec2vi4;
typedef Vec2<vllong4> Vec2vl4;
typedef Vec2<vbool4> Vec2vb4;
typedef Vec2<vboolf4> Vec2vbf4;
typedef Vec2<vboold4> Vec2vbd4;
typedef Vec2<vfloat8> Vec2vf8;
typedef Vec2<vdouble8> Vec2vd8;
typedef Vec2<vreal8> Vec2vr8;
typedef Vec2<vint8> Vec2vi8;
typedef Vec2<vllong8> Vec2vl8;
typedef Vec2<vbool8> Vec2vb8;
typedef Vec2<vboolf8> Vec2vbf8;
typedef Vec2<vboold8> Vec2vbd8;
typedef Vec2<vfloat16> Vec2vf16;
typedef Vec2<vdouble16> Vec2vd16;
typedef Vec2<vreal16> Vec2vr16;
typedef Vec2<vint16> Vec2vi16;
typedef Vec2<vllong16> Vec2vl16;
typedef Vec2<vbool16> Vec2vb16;
typedef Vec2<vboolf16> Vec2vbf16;
typedef Vec2<vboold16> Vec2vbd16;
typedef Vec2<vfloatx> Vec2vfx;
typedef Vec2<vdoublex> Vec2vdx;
typedef Vec2<vrealx> Vec2vrx;
typedef Vec2<vintx> Vec2vix;
typedef Vec2<vllongx> Vec2vlx;
typedef Vec2<vboolx> Vec2vbx;
typedef Vec2<vboolfx> Vec2vbfx;
typedef Vec2<vbooldx> Vec2vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Vec3 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec3vf = Vec3<vfloat<N>>;
template<int N> using Vec3vd = Vec3<vdouble<N>>;
template<int N> using Vec3vr = Vec3<vreal<N>>;
template<int N> using Vec3vi = Vec3<vint<N>>;
template<int N> using Vec3vl = Vec3<vllong<N>>;
template<int N> using Vec3vb = Vec3<vbool<N>>;
template<int N> using Vec3vbf = Vec3<vboolf<N>>;
template<int N> using Vec3vbd = Vec3<vboold<N>>;
typedef Vec3<vfloat4> Vec3vf4;
typedef Vec3<vdouble4> Vec3vd4;
typedef Vec3<vreal4> Vec3vr4;
typedef Vec3<vint4> Vec3vi4;
typedef Vec3<vllong4> Vec3vl4;
typedef Vec3<vbool4> Vec3vb4;
typedef Vec3<vboolf4> Vec3vbf4;
typedef Vec3<vboold4> Vec3vbd4;
typedef Vec3<vfloat8> Vec3vf8;
typedef Vec3<vdouble8> Vec3vd8;
typedef Vec3<vreal8> Vec3vr8;
typedef Vec3<vint8> Vec3vi8;
typedef Vec3<vllong8> Vec3vl8;
typedef Vec3<vbool8> Vec3vb8;
typedef Vec3<vboolf8> Vec3vbf8;
typedef Vec3<vboold8> Vec3vbd8;
typedef Vec3<vfloat16> Vec3vf16;
typedef Vec3<vdouble16> Vec3vd16;
typedef Vec3<vreal16> Vec3vr16;
typedef Vec3<vint16> Vec3vi16;
typedef Vec3<vllong16> Vec3vl16;
typedef Vec3<vbool16> Vec3vb16;
typedef Vec3<vboolf16> Vec3vbf16;
typedef Vec3<vboold16> Vec3vbd16;
typedef Vec3<vfloatx> Vec3vfx;
typedef Vec3<vdoublex> Vec3vdx;
typedef Vec3<vrealx> Vec3vrx;
typedef Vec3<vintx> Vec3vix;
typedef Vec3<vllongx> Vec3vlx;
typedef Vec3<vboolx> Vec3vbx;
typedef Vec3<vboolfx> Vec3vbfx;
typedef Vec3<vbooldx> Vec3vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Vec4 shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using Vec4vf = Vec4<vfloat<N>>;
template<int N> using Vec4vd = Vec4<vdouble<N>>;
template<int N> using Vec4vr = Vec4<vreal<N>>;
template<int N> using Vec4vi = Vec4<vint<N>>;
template<int N> using Vec4vl = Vec4<vllong<N>>;
template<int N> using Vec4vb = Vec4<vbool<N>>;
template<int N> using Vec4vbf = Vec4<vboolf<N>>;
template<int N> using Vec4vbd = Vec4<vboold<N>>;
typedef Vec4<vfloat4> Vec4vf4;
typedef Vec4<vdouble4> Vec4vd4;
typedef Vec4<vreal4> Vec4vr4;
typedef Vec4<vint4> Vec4vi4;
typedef Vec4<vllong4> Vec4vl4;
typedef Vec4<vbool4> Vec4vb4;
typedef Vec4<vboolf4> Vec4vbf4;
typedef Vec4<vboold4> Vec4vbd4;
typedef Vec4<vfloat8> Vec4vf8;
typedef Vec4<vdouble8> Vec4vd8;
typedef Vec4<vreal8> Vec4vr8;
typedef Vec4<vint8> Vec4vi8;
typedef Vec4<vllong8> Vec4vl8;
typedef Vec4<vbool8> Vec4vb8;
typedef Vec4<vboolf8> Vec4vbf8;
typedef Vec4<vboold8> Vec4vbd8;
typedef Vec4<vfloat16> Vec4vf16;
typedef Vec4<vdouble16> Vec4vd16;
typedef Vec4<vreal16> Vec4vr16;
typedef Vec4<vint16> Vec4vi16;
typedef Vec4<vllong16> Vec4vl16;
typedef Vec4<vbool16> Vec4vb16;
typedef Vec4<vboolf16> Vec4vbf16;
typedef Vec4<vboold16> Vec4vbd16;
typedef Vec4<vfloatx> Vec4vfx;
typedef Vec4<vdoublex> Vec4vdx;
typedef Vec4<vrealx> Vec4vrx;
typedef Vec4<vintx> Vec4vix;
typedef Vec4<vllongx> Vec4vlx;
typedef Vec4<vboolx> Vec4vbx;
typedef Vec4<vboolfx> Vec4vbfx;
typedef Vec4<vbooldx> Vec4vbdx;
////////////////////////////////////////////////////////////////////////////////
/// Other shortcuts
////////////////////////////////////////////////////////////////////////////////
template<int N> using BBox3vf = BBox<Vec3vf<N>>;
typedef BBox<Vec3vf4> BBox3vf4;
typedef BBox<Vec3vf8> BBox3vf8;
typedef BBox<Vec3vf16> BBox3vf16;
/* calculate time segment itime and fractional time ftime */
__forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
{
const float timeScaled = time * numTimeSegments;
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return int(itimef);
}
__forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
{
const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
const float itimef = clamp(floor(timeScaled), 0.0f, numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return int(itimef);
}
template<int N>
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
{
const vfloat<N> timeScaled = time * numTimeSegments;
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return vint<N>(itimef);
}
template<int N>
__forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
{
const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
ftime = timeScaled - itimef;
return vint<N>(itimef);
}
/* calculate overlapping time segment range */
__forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
{
const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
const float round_down = 1.0f-2.0f*float(ulp);
const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f);
const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
return make_range(itime_lower, itime_upper);
}
/* calculate overlapping time segment range */
__forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
{
const float lower = (range.lower-time_range.lower)/time_range.size();
const float upper = (range.upper-time_range.lower)/time_range.size();
return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
}
}

View file

@ -0,0 +1,730 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "device.h"
#include "../../common/tasking/taskscheduler.h"
#include "../hash.h"
#include "scene_triangle_mesh.h"
#include "scene_user_geometry.h"
#include "scene_instance.h"
#include "scene_curves.h"
#include "scene_subdiv_mesh.h"
#include "../subdiv/tessellation_cache.h"
#include "acceln.h"
#include "geometry.h"
#include "../geometry/cylinder.h"
#include "../bvh/bvh4_factory.h"
#include "../bvh/bvh8_factory.h"
#include "../../common/sys/alloc.h"
#if defined(EMBREE_SYCL_SUPPORT)
# include "../level_zero/ze_wrapper.h"
#endif
namespace embree
{
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
ssize_t Device::debug_int0 = 0;
ssize_t Device::debug_int1 = 0;
ssize_t Device::debug_int2 = 0;
ssize_t Device::debug_int3 = 0;
static MutexSys g_mutex;
static std::map<Device*,size_t> g_cache_size_map;
static std::map<Device*,size_t> g_num_threads_map;
struct TaskArena
{
#if USE_TASK_ARENA
std::unique_ptr<tbb::task_arena> arena;
#endif
};
Device::Device (const char* cfg) : arena(new TaskArena())
{
/* check that CPU supports lowest ISA */
if (!hasISA(ISA)) {
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
}
/* set default frequency level for detected CPU */
switch (getCPUModel()) {
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
}
/* initialize global state */
#if defined(EMBREE_CONFIG)
State::parseString(EMBREE_CONFIG);
#endif
State::parseString(cfg);
State::verify();
/* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
if (!checkISASupport()) {
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
}
/*! do some internal tests */
assert(isa::Cylinder::verify());
/*! enable huge page support if desired */
#if defined(__WIN32__)
if (State::enable_selockmemoryprivilege)
State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
#endif
State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
/*! set tessellation cache size */
setCacheSize( State::tessellation_cache_size );
/*! enable some floating point exceptions to catch bugs */
if (State::float_exceptions)
{
int exceptions = _MM_MASK_MASK;
//exceptions &= ~_MM_MASK_INVALID;
exceptions &= ~_MM_MASK_DENORM;
exceptions &= ~_MM_MASK_DIV_ZERO;
//exceptions &= ~_MM_MASK_OVERFLOW;
//exceptions &= ~_MM_MASK_UNDERFLOW;
//exceptions &= ~_MM_MASK_INEXACT;
_MM_SET_EXCEPTION_MASK(exceptions);
}
/* print info header */
if (State::verbosity(1))
print();
if (State::verbosity(2))
State::print();
/* register all algorithms */
bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
#if defined(EMBREE_TARGET_SIMD8)
bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
#endif
/* setup tasking system */
initTaskingSystem(numThreads);
}
Device::~Device ()
{
setCacheSize(0);
exitTaskingSystem();
}
std::string getEnabledTargets()
{
std::string v;
#if defined(EMBREE_TARGET_SSE2)
v += "SSE2 ";
#endif
#if defined(EMBREE_TARGET_SSE42)
v += "SSE4.2 ";
#endif
#if defined(EMBREE_TARGET_AVX)
v += "AVX ";
#endif
#if defined(EMBREE_TARGET_AVX2)
v += "AVX2 ";
#endif
#if defined(EMBREE_TARGET_AVX512)
v += "AVX512 ";
#endif
return v;
}
std::string getEmbreeFeatures()
{
std::string v;
#if defined(EMBREE_RAY_MASK)
v += "raymasks ";
#endif
#if defined (EMBREE_BACKFACE_CULLING)
v += "backfaceculling ";
#endif
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
v += "backfacecullingcurves ";
#endif
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
v += "backfacecullingspheres ";
#endif
#if defined(EMBREE_FILTER_FUNCTION)
v += "intersection_filter ";
#endif
#if defined (EMBREE_COMPACT_POLYS)
v += "compact_polys ";
#endif
return v;
}
void Device::print()
{
const int cpu_features = getCPUFeatures();
std::cout << std::endl;
std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
std::cout << " Compiler : " << getCompilerName() << std::endl;
std::cout << " Build : ";
#if defined(DEBUG)
std::cout << "Debug " << std::endl;
#else
std::cout << "Release " << std::endl;
#endif
std::cout << " Platform : " << getPlatformName() << std::endl;
std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
std::cout << " Config" << std::endl;
std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
std::cout << " Features: " << getEmbreeFeatures() << std::endl;
std::cout << " Tasking : ";
#if defined(TASKING_TBB)
std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
#if TBB_INTERFACE_VERSION >= 12002
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
#else
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
#endif
#endif
#if defined(TASKING_INTERNAL)
std::cout << "internal_tasking_system ";
#endif
#if defined(TASKING_PPL)
std::cout << "PPL ";
#endif
std::cout << std::endl;
/* check of FTZ and DAZ flags are set in CSR */
if (!hasFTZ || !hasDAZ)
{
#if !defined(_DEBUG)
if (State::verbosity(1))
#endif
{
std::cout << std::endl;
std::cout << "================================================================================" << std::endl;
std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
<< " in the MXCSR control and status register. This can have a severe " << std::endl
<< " performance impact. Please enable these modes for each application " << std::endl
<< " thread the following way:" << std::endl
<< std::endl
<< " #include \"xmmintrin.h\"" << std::endl
<< " #include \"pmmintrin.h\"" << std::endl
<< std::endl
<< " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
<< " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
std::cout << "================================================================================" << std::endl;
std::cout << std::endl;
}
}
std::cout << std::endl;
}
void Device::setDeviceErrorCode(RTCError error)
{
RTCError* stored_error = errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
}
RTCError Device::getDeviceErrorCode()
{
RTCError* stored_error = errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
}
void Device::setThreadErrorCode(RTCError error)
{
RTCError* stored_error = g_errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
}
RTCError Device::getThreadErrorCode()
{
RTCError* stored_error = g_errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
}
void Device::process_error(Device* device, RTCError error, const char* str)
{
/* store global error code when device construction failed */
if (!device)
return setThreadErrorCode(error);
/* print error when in verbose mode */
if (device->verbosity(1))
{
switch (error) {
case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
default : std::cerr << "Embree: Invalid error code"; break;
};
if (str) std::cerr << ", (" << str << ")";
std::cerr << std::endl;
}
/* call user specified error callback */
if (device->error_function)
device->error_function(device->error_function_userptr,error,str);
/* record error code */
device->setDeviceErrorCode(error);
}
void Device::memoryMonitor(ssize_t bytes, bool post)
{
if (State::memory_monitor_function && bytes != 0) {
if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
}
}
}
}
size_t getMaxNumThreads()
{
size_t maxNumThreads = 0;
for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
maxNumThreads = max(maxNumThreads, (*i).second);
if (maxNumThreads == 0)
maxNumThreads = std::numeric_limits<size_t>::max();
return maxNumThreads;
}
size_t getMaxCacheSize()
{
size_t maxCacheSize = 0;
for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
maxCacheSize = max(maxCacheSize, (*i).second);
return maxCacheSize;
}
void Device::setCacheSize(size_t bytes)
{
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
Lock<MutexSys> lock(g_mutex);
if (bytes == 0) g_cache_size_map.erase(this);
else g_cache_size_map[this] = bytes;
size_t maxCacheSize = getMaxCacheSize();
resizeTessellationCache(maxCacheSize);
#endif
}
void Device::initTaskingSystem(size_t numThreads)
{
Lock<MutexSys> lock(g_mutex);
if (numThreads == 0)
g_num_threads_map[this] = std::numeric_limits<size_t>::max();
else
g_num_threads_map[this] = numThreads;
/* create task scheduler */
size_t maxNumThreads = getMaxNumThreads();
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
#if USE_TASK_ARENA
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
#endif
}
void Device::exitTaskingSystem()
{
Lock<MutexSys> lock(g_mutex);
g_num_threads_map.erase(this);
/* terminate tasking system */
if (g_num_threads_map.size() == 0) {
TaskScheduler::destroy();
}
/* or configure new number of threads */
else {
size_t maxNumThreads = getMaxNumThreads();
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
}
#if USE_TASK_ARENA
arena->arena.reset();
#endif
}
void Device::execute(bool join, const std::function<void()>& func)
{
#if USE_TASK_ARENA
if (join) {
arena->arena->execute(func);
}
else
#endif
{
func();
}
}
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
{
/* hidden internal properties */
switch ((size_t)prop)
{
case 1000000: debug_int0 = val; return;
case 1000001: debug_int1 = val; return;
case 1000002: debug_int2 = val; return;
case 1000003: debug_int3 = val; return;
}
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
}
ssize_t Device::getProperty(const RTCDeviceProperty prop)
{
size_t iprop = (size_t)prop;
/* get name of internal regression test */
if (iprop >= 2000000 && iprop < 3000000)
{
RegressionTest* test = getRegressionTest(iprop-2000000);
if (test) return (ssize_t) test->name.c_str();
else return 0;
}
/* run internal regression test */
if (iprop >= 3000000 && iprop < 4000000)
{
RegressionTest* test = getRegressionTest(iprop-3000000);
if (test) return test->run();
else return 0;
}
/* documented properties */
switch (prop)
{
case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
#endif
#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
#endif
#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
#else
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
#endif
#if defined(EMBREE_RAY_MASK)
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING_CURVES)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
#endif
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
#endif
#if defined(EMBREE_COMPACT_POLYS)
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
#endif
#if defined(EMBREE_FILTER_FUNCTION)
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
#endif
#if defined(EMBREE_IGNORE_INVALID_RAYS)
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
#else
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
#endif
#if defined(TASKING_INTERNAL)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
#endif
#if defined(TASKING_TBB)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
#endif
#if defined(TASKING_PPL)
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
#endif
#if defined(EMBREE_GEOMETRY_TRIANGLE)
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_CURVE)
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_USER)
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(EMBREE_GEOMETRY_POINT)
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
#endif
#if defined(TASKING_PPL)
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
#else
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
#endif
#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
#else
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
#endif
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
};
}
void* Device::malloc(size_t size, size_t align) {
return alignedMalloc(size,align);
}
void Device::free(void* ptr) {
alignedFree(ptr);
}
#if defined(EMBREE_SYCL_SUPPORT)
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
: Device(cfg), gpu_context(sycl_context)
{
/* initialize ZeWrapper */
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
/* take first device as default device */
auto devices = gpu_context.get_devices();
if (devices.size() == 0)
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
gpu_device = devices[0];
/* check if RTAS build extension is available */
sycl::platform platform = gpu_device.get_platform();
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
uint32_t count = 0;
std::vector<ze_driver_extension_properties_t> extensions;
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
extensions.resize(count);
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
ze_rtas_builder = true;
}
if (!ze_rtas_builder)
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
#else
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
#endif
if (State::verbosity(1))
{
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << " Internal RTAS Builder" << std::endl;
else
std::cout << " Level Zero RTAS Builder" << std::endl;
}
/* check if extension library can get loaded */
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
if (result == ZE_RESULT_SUCCESS)
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
if (State::verbosity(1))
{
sycl::platform platform = gpu_context.get_platform();
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
std::cout << std::endl;
}
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
}
DeviceGPU::~DeviceGPU()
{
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
}
void DeviceGPU::enter() {
enableUSMAllocEmbree(&gpu_context,&gpu_device);
}
void DeviceGPU::leave() {
disableUSMAllocEmbree();
}
void* DeviceGPU::malloc(size_t size, size_t align) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
}
void DeviceGPU::free(void* ptr) {
alignedSYCLFree(&gpu_context,ptr);
}
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
gpu_device = sycl_device_in;
}
#endif
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
: device((Device*)hdevice)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
: device(((Scene*)hscene)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
: device(((Geometry*)hgeometry)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
: device(((Buffer*)hbuffer)->device)
{
assert(device);
device->refInc();
device->enter();
}
DeviceEnterLeave::~DeviceEnterLeave() {
device->leave();
device->refDec();
}
}

View file

@ -0,0 +1,194 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "state.h"
#include "accel.h"
namespace embree
{
class BVH4Factory;
class BVH8Factory;
struct TaskArena;
class Device : public State, public MemoryMonitorInterface
{
ALIGNED_CLASS_(16);
public:
/*! allocator that performs unified shared memory allocations */
template<typename T, size_t alignment>
struct allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
allocator() {}
allocator(Device* device)
: device(device) {}
__forceinline pointer allocate( size_type n ) {
assert(device);
return (pointer) device->malloc(n*sizeof(T),alignment);
}
__forceinline void deallocate( pointer p, size_type n ) {
if (device) device->free(p);
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
Device* device = nullptr;
};
/*! vector class that performs aligned allocations from Device object */
template<typename T>
using vector = vector_t<T,allocator<T,std::alignment_of<T>::value>>;
template<typename T, size_t alignment>
using avector = vector_t<T,allocator<T,alignment>>;
public:
/*! Device construction */
Device (const char* cfg);
/*! Device destruction */
virtual ~Device ();
/*! prints info about the device */
void print();
/*! sets the error code */
void setDeviceErrorCode(RTCError error);
/*! returns and clears the error code */
RTCError getDeviceErrorCode();
/*! sets the error code */
static void setThreadErrorCode(RTCError error);
/*! returns and clears the error code */
static RTCError getThreadErrorCode();
/*! processes error codes, do not call directly */
static void process_error(Device* device, RTCError error, const char* str);
/*! invokes the memory monitor callback */
void memoryMonitor(ssize_t bytes, bool post);
/*! sets the size of the software cache. */
void setCacheSize(size_t bytes);
/*! sets a property */
void setProperty(const RTCDeviceProperty prop, ssize_t val);
/*! gets a property */
ssize_t getProperty(const RTCDeviceProperty prop);
/*! enter device by setting up some global state */
virtual void enter() {}
/*! leave device by setting up some global state */
virtual void leave() {}
/*! buffer allocation */
virtual void* malloc(size_t size, size_t align);
/*! buffer deallocation */
virtual void free(void* ptr);
private:
/*! initializes the tasking system */
void initTaskingSystem(size_t numThreads);
/*! shuts down the tasking system */
void exitTaskingSystem();
std::unique_ptr<TaskArena> arena;
public:
// use tasking system arena to execute func
void execute(bool join, const std::function<void()>& func);
/*! some variables that can be set via rtcSetParameter1i for debugging purposes */
public:
static ssize_t debug_int0;
static ssize_t debug_int1;
static ssize_t debug_int2;
static ssize_t debug_int3;
public:
std::unique_ptr<BVH4Factory> bvh4_factory;
#if defined(EMBREE_TARGET_SIMD8)
std::unique_ptr<BVH8Factory> bvh8_factory;
#endif
};
#if defined(EMBREE_SYCL_SUPPORT)
class DeviceGPU : public Device
{
public:
DeviceGPU(sycl::context sycl_context, const char* cfg);
~DeviceGPU();
virtual void enter() override;
virtual void leave() override;
virtual void* malloc(size_t size, size_t align) override;
virtual void free(void* ptr) override;
/* set SYCL device */
void setSYCLDevice(const sycl::device sycl_device);
private:
sycl::context gpu_context;
sycl::device gpu_device;
unsigned int gpu_maxWorkGroupSize;
unsigned int gpu_maxComputeUnits;
public:
void* dispatchGlobalsPtr = nullptr;
public:
inline sycl::device &getGPUDevice() { return gpu_device; }
inline sycl::context &getGPUContext() { return gpu_context; }
inline unsigned int getGPUMaxWorkGroupSize() { return gpu_maxWorkGroupSize; }
void init_rthw_level_zero();
void init_rthw_opencl();
};
#endif
struct DeviceEnterLeave
{
DeviceEnterLeave (RTCDevice hdevice);
DeviceEnterLeave (RTCScene hscene);
DeviceEnterLeave (RTCGeometry hgeometry);
DeviceEnterLeave (RTCBuffer hbuffer);
~DeviceEnterLeave();
private:
Device* device;
};
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "geometry.h"
#include "scene.h"
namespace embree
{
const char* Geometry::gtype_names[Geometry::GTY_END] =
{
"flat_linear_curve",
"round_linear_curve",
"oriented_linear_curve",
"",
"flat_bezier_curve",
"round_bezier_curve",
"oriented_bezier_curve",
"",
"flat_bspline_curve",
"round_bspline_curve",
"oriented_bspline_curve",
"",
"flat_hermite_curve",
"round_hermite_curve",
"oriented_hermite_curve",
"",
"flat_catmull_rom_curve",
"round_catmull_rom_curve",
"oriented_catmull_rom_curve",
"",
"triangles",
"quads",
"grid",
"subdivs",
"",
"sphere",
"disc",
"oriented_disc",
"",
"usergeom",
"instance_cheap",
"instance_expensive",
};
Geometry::Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps)
: device(device), userPtr(nullptr),
numPrimitives(numPrimitives), numTimeSteps(unsigned(numTimeSteps)), fnumTimeSegments(float(numTimeSteps-1)), time_range(0.0f,1.0f),
mask(1),
gtype(gtype),
gsubtype(GTY_SUBTYPE_DEFAULT),
quality(RTC_BUILD_QUALITY_MEDIUM),
state((unsigned)State::MODIFIED),
enabled(true),
argumentFilterEnabled(false),
intersectionFilterN(nullptr), occlusionFilterN(nullptr), pointQueryFunc(nullptr)
{
device->refInc();
}
Geometry::~Geometry()
{
device->refDec();
}
void Geometry::setNumPrimitives(unsigned int numPrimitives_in)
{
if (numPrimitives_in == numPrimitives) return;
numPrimitives = numPrimitives_in;
Geometry::update();
}
void Geometry::setNumTimeSteps (unsigned int numTimeSteps_in)
{
if (numTimeSteps_in == numTimeSteps) {
return;
}
numTimeSteps = numTimeSteps_in;
fnumTimeSegments = float(numTimeSteps_in-1);
Geometry::update();
}
void Geometry::setTimeRange (const BBox1f range)
{
time_range = range;
Geometry::update();
}
BBox1f Geometry::getTimeRange () const
{
return time_range;
}
void Geometry::update()
{
++modCounter_; // FIXME: required?
state = (unsigned)State::MODIFIED;
}
void Geometry::commit()
{
++modCounter_;
state = (unsigned)State::COMMITTED;
}
void Geometry::preCommit()
{
if (State::MODIFIED == (State)state)
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"geometry not committed");
}
void Geometry::postCommit()
{
}
void Geometry::enable ()
{
if (isEnabled())
return;
enabled = true;
++modCounter_;
}
void Geometry::disable ()
{
if (isDisabled())
return;
enabled = false;
++modCounter_;
}
void Geometry::setUserData (void* ptr)
{
userPtr = ptr;
}
void Geometry::setIntersectionFilterFunctionN (RTCFilterFunctionN filter)
{
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
intersectionFilterN = filter;
}
void Geometry::setOcclusionFilterFunctionN (RTCFilterFunctionN filter)
{
if (!(getTypeMask() & (MTY_TRIANGLE_MESH | MTY_QUAD_MESH | MTY_CURVES | MTY_SUBDIV_MESH | MTY_USER_GEOMETRY | MTY_GRID_MESH)))
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"filter functions not supported for this geometry");
occlusionFilterN = filter;
}
void Geometry::setPointQueryFunction (RTCPointQueryFunction func)
{
pointQueryFunc = func;
}
void Geometry::interpolateN(const RTCInterpolateNArguments* const args)
{
const void* valid_i = args->valid;
const unsigned* primIDs = args->primIDs;
const float* u = args->u;
const float* v = args->v;
unsigned int N = args->N;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
if (valueCount > 256) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"maximally 256 floating point values can be interpolated per vertex");
const int* valid = (const int*) valid_i;
__aligned(64) float P_tmp[256];
__aligned(64) float dPdu_tmp[256];
__aligned(64) float dPdv_tmp[256];
__aligned(64) float ddPdudu_tmp[256];
__aligned(64) float ddPdvdv_tmp[256];
__aligned(64) float ddPdudv_tmp[256];
float* Pt = P ? P_tmp : nullptr;
float* dPdut = nullptr, *dPdvt = nullptr;
if (dPdu) { dPdut = dPdu_tmp; dPdvt = dPdv_tmp; }
float* ddPdudut = nullptr, *ddPdvdvt = nullptr, *ddPdudvt = nullptr;
if (ddPdudu) { ddPdudut = ddPdudu_tmp; ddPdvdvt = ddPdvdv_tmp; ddPdudvt = ddPdudv_tmp; }
for (unsigned int i=0; i<N; i++)
{
if (valid && !valid[i]) continue;
RTCInterpolateArguments iargs;
iargs.primID = primIDs[i];
iargs.u = u[i];
iargs.v = v[i];
iargs.bufferType = bufferType;
iargs.bufferSlot = bufferSlot;
iargs.P = Pt;
iargs.dPdu = dPdut;
iargs.dPdv = dPdvt;
iargs.ddPdudu = ddPdudut;
iargs.ddPdvdv = ddPdvdvt;
iargs.ddPdudv = ddPdudvt;
iargs.valueCount = valueCount;
interpolate(&iargs);
if (likely(P)) {
for (unsigned int j=0; j<valueCount; j++)
P[j*N+i] = Pt[j];
}
if (likely(dPdu))
{
for (unsigned int j=0; j<valueCount; j++) {
dPdu[j*N+i] = dPdut[j];
dPdv[j*N+i] = dPdvt[j];
}
}
if (likely(ddPdudu))
{
for (unsigned int j=0; j<valueCount; j++) {
ddPdudu[j*N+i] = ddPdudut[j];
ddPdvdv[j*N+i] = ddPdvdvt[j];
ddPdudv[j*N+i] = ddPdudvt[j];
}
}
}
}
bool Geometry::pointQuery(PointQuery* query, PointQueryContext* context)
{
assert(context->primID < size());
RTCPointQueryFunctionArguments args;
args.query = (RTCPointQuery*)context->query_ws;
args.userPtr = context->userPtr;
args.primID = context->primID;
args.geomID = context->geomID;
args.context = context->userContext;
args.similarityScale = context->similarityScale;
bool update = false;
if(context->func) update |= context->func(&args);
if(pointQueryFunc) update |= pointQueryFunc(&args);
if (update && context->userContext->instStackSize > 0)
{
// update point query
if (context->query_type == POINT_QUERY_TYPE_AABB) {
context->updateAABB();
} else {
assert(context->similarityScale > 0.f);
query->radius = context->query_ws->radius * context->similarityScale;
}
}
return update;
}
}

View file

@ -0,0 +1,663 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
#include "buffer.h"
#include "../common/point_query.h"
#include "../builders/priminfo.h"
#include "../builders/priminfo_mb.h"
namespace embree
{
class Scene;
class Geometry;
struct GeometryCounts
{
__forceinline GeometryCounts()
: numFilterFunctions(0),
numTriangles(0), numMBTriangles(0),
numQuads(0), numMBQuads(0),
numBezierCurves(0), numMBBezierCurves(0),
numLineSegments(0), numMBLineSegments(0),
numSubdivPatches(0), numMBSubdivPatches(0),
numUserGeometries(0), numMBUserGeometries(0),
numInstancesCheap(0), numMBInstancesCheap(0),
numInstancesExpensive(0), numMBInstancesExpensive(0),
numInstanceArrays(0), numMBInstanceArrays(0),
numGrids(0), numMBGrids(0),
numSubGrids(0), numMBSubGrids(0),
numPoints(0), numMBPoints(0) {}
__forceinline size_t size() const {
return numTriangles + numQuads + numBezierCurves + numLineSegments + numSubdivPatches + numUserGeometries + numInstancesCheap + numInstancesExpensive + numInstanceArrays + numGrids + numPoints
+ numMBTriangles + numMBQuads + numMBBezierCurves + numMBLineSegments + numMBSubdivPatches + numMBUserGeometries + numMBInstancesCheap + numMBInstancesExpensive + numMBInstanceArrays + numMBGrids + numMBPoints;
}
__forceinline unsigned int enabledGeometryTypesMask() const
{
unsigned int mask = 0;
if (numTriangles) mask |= 1 << 0;
if (numQuads) mask |= 1 << 1;
if (numBezierCurves+numLineSegments) mask |= 1 << 2;
if (numSubdivPatches) mask |= 1 << 3;
if (numUserGeometries) mask |= 1 << 4;
if (numInstancesCheap) mask |= 1 << 5;
if (numInstancesExpensive) mask |= 1 << 6;
if (numInstanceArrays) mask |= 1 << 7;
if (numGrids) mask |= 1 << 8;
if (numPoints) mask |= 1 << 9;
unsigned int maskMB = 0;
if (numMBTriangles) maskMB |= 1 << 0;
if (numMBQuads) maskMB |= 1 << 1;
if (numMBBezierCurves+numMBLineSegments) maskMB |= 1 << 2;
if (numMBSubdivPatches) maskMB |= 1 << 3;
if (numMBUserGeometries) maskMB |= 1 << 4;
if (numMBInstancesCheap) maskMB |= 1 << 5;
if (numMBInstancesExpensive) maskMB |= 1 << 6;
if (numMBInstanceArrays) maskMB |= 1 << 7;
if (numMBGrids) maskMB |= 1 << 8;
if (numMBPoints) maskMB |= 1 << 9;
return (mask<<8) + maskMB;
}
__forceinline GeometryCounts operator+ (GeometryCounts const & rhs) const
{
GeometryCounts ret;
ret.numFilterFunctions = numFilterFunctions + rhs.numFilterFunctions;
ret.numTriangles = numTriangles + rhs.numTriangles;
ret.numMBTriangles = numMBTriangles + rhs.numMBTriangles;
ret.numQuads = numQuads + rhs.numQuads;
ret.numMBQuads = numMBQuads + rhs.numMBQuads;
ret.numBezierCurves = numBezierCurves + rhs.numBezierCurves;
ret.numMBBezierCurves = numMBBezierCurves + rhs.numMBBezierCurves;
ret.numLineSegments = numLineSegments + rhs.numLineSegments;
ret.numMBLineSegments = numMBLineSegments + rhs.numMBLineSegments;
ret.numSubdivPatches = numSubdivPatches + rhs.numSubdivPatches;
ret.numMBSubdivPatches = numMBSubdivPatches + rhs.numMBSubdivPatches;
ret.numUserGeometries = numUserGeometries + rhs.numUserGeometries;
ret.numMBUserGeometries = numMBUserGeometries + rhs.numMBUserGeometries;
ret.numInstancesCheap = numInstancesCheap + rhs.numInstancesCheap;
ret.numMBInstancesCheap = numMBInstancesCheap + rhs.numMBInstancesCheap;
ret.numInstancesExpensive = numInstancesExpensive + rhs.numInstancesExpensive;
ret.numMBInstancesExpensive = numMBInstancesExpensive + rhs.numMBInstancesExpensive;
ret.numInstanceArrays = numInstanceArrays + rhs.numInstanceArrays;
ret.numMBInstanceArrays = numMBInstanceArrays + rhs.numMBInstanceArrays;
ret.numGrids = numGrids + rhs.numGrids;
ret.numMBGrids = numMBGrids + rhs.numMBGrids;
ret.numSubGrids = numSubGrids + rhs.numSubGrids;
ret.numMBSubGrids = numMBSubGrids + rhs.numMBSubGrids;
ret.numPoints = numPoints + rhs.numPoints;
ret.numMBPoints = numMBPoints + rhs.numMBPoints;
return ret;
}
size_t numFilterFunctions; //!< number of geometries with filter functions enabled
size_t numTriangles; //!< number of enabled triangles
size_t numMBTriangles; //!< number of enabled motion blurred triangles
size_t numQuads; //!< number of enabled quads
size_t numMBQuads; //!< number of enabled motion blurred quads
size_t numBezierCurves; //!< number of enabled curves
size_t numMBBezierCurves; //!< number of enabled motion blurred curves
size_t numLineSegments; //!< number of enabled line segments
size_t numMBLineSegments; //!< number of enabled line motion blurred segments
size_t numSubdivPatches; //!< number of enabled subdivision patches
size_t numMBSubdivPatches; //!< number of enabled motion blurred subdivision patches
size_t numUserGeometries; //!< number of enabled user geometries
size_t numMBUserGeometries; //!< number of enabled motion blurred user geometries
size_t numInstancesCheap; //!< number of enabled cheap instances
size_t numMBInstancesCheap; //!< number of enabled motion blurred cheap instances
size_t numInstancesExpensive; //!< number of enabled expensive instances
size_t numMBInstancesExpensive; //!< number of enabled motion blurred expensive instances
size_t numInstanceArrays; //!< number of enabled instance arrays
size_t numMBInstanceArrays; //!< number of enabled motion blurred instance arrays
size_t numGrids; //!< number of enabled grid geometries
size_t numMBGrids; //!< number of enabled motion blurred grid geometries
size_t numSubGrids; //!< number of enabled grid geometries
size_t numMBSubGrids; //!< number of enabled motion blurred grid geometries
size_t numPoints; //!< number of enabled points
size_t numMBPoints; //!< number of enabled motion blurred points
};
/*! Base class all geometries are derived from */
class Geometry : public RefCount
{
ALIGNED_CLASS_USM_(16);
friend class Scene;
public:
/*! type of geometry */
enum GType
{
GTY_FLAT_LINEAR_CURVE = 0,
GTY_ROUND_LINEAR_CURVE = 1,
GTY_ORIENTED_LINEAR_CURVE = 2,
GTY_CONE_LINEAR_CURVE = 3,
GTY_FLAT_BEZIER_CURVE = 4,
GTY_ROUND_BEZIER_CURVE = 5,
GTY_ORIENTED_BEZIER_CURVE = 6,
GTY_FLAT_BSPLINE_CURVE = 8,
GTY_ROUND_BSPLINE_CURVE = 9,
GTY_ORIENTED_BSPLINE_CURVE = 10,
GTY_FLAT_HERMITE_CURVE = 12,
GTY_ROUND_HERMITE_CURVE = 13,
GTY_ORIENTED_HERMITE_CURVE = 14,
GTY_FLAT_CATMULL_ROM_CURVE = 16,
GTY_ROUND_CATMULL_ROM_CURVE = 17,
GTY_ORIENTED_CATMULL_ROM_CURVE = 18,
GTY_TRIANGLE_MESH = 20,
GTY_QUAD_MESH = 21,
GTY_GRID_MESH = 22,
GTY_SUBDIV_MESH = 23,
GTY_SPHERE_POINT = 25,
GTY_DISC_POINT = 26,
GTY_ORIENTED_DISC_POINT = 27,
GTY_USER_GEOMETRY = 29,
GTY_INSTANCE_CHEAP = 30,
GTY_INSTANCE_EXPENSIVE = 31,
GTY_INSTANCE_ARRAY = 24,
GTY_END = 32,
GTY_BASIS_LINEAR = 0,
GTY_BASIS_BEZIER = 4,
GTY_BASIS_BSPLINE = 8,
GTY_BASIS_HERMITE = 12,
GTY_BASIS_CATMULL_ROM = 16,
GTY_BASIS_MASK = 28,
GTY_SUBTYPE_FLAT_CURVE = 0,
GTY_SUBTYPE_ROUND_CURVE = 1,
GTY_SUBTYPE_ORIENTED_CURVE = 2,
GTY_SUBTYPE_MASK = 3,
};
enum GSubType
{
GTY_SUBTYPE_DEFAULT= 0,
GTY_SUBTYPE_INSTANCE_LINEAR = 0,
GTY_SUBTYPE_INSTANCE_QUATERNION = 1
};
enum GTypeMask
{
MTY_FLAT_LINEAR_CURVE = 1ul << GTY_FLAT_LINEAR_CURVE,
MTY_ROUND_LINEAR_CURVE = 1ul << GTY_ROUND_LINEAR_CURVE,
MTY_CONE_LINEAR_CURVE = 1ul << GTY_CONE_LINEAR_CURVE,
MTY_ORIENTED_LINEAR_CURVE = 1ul << GTY_ORIENTED_LINEAR_CURVE,
MTY_FLAT_BEZIER_CURVE = 1ul << GTY_FLAT_BEZIER_CURVE,
MTY_ROUND_BEZIER_CURVE = 1ul << GTY_ROUND_BEZIER_CURVE,
MTY_ORIENTED_BEZIER_CURVE = 1ul << GTY_ORIENTED_BEZIER_CURVE,
MTY_FLAT_BSPLINE_CURVE = 1ul << GTY_FLAT_BSPLINE_CURVE,
MTY_ROUND_BSPLINE_CURVE = 1ul << GTY_ROUND_BSPLINE_CURVE,
MTY_ORIENTED_BSPLINE_CURVE = 1ul << GTY_ORIENTED_BSPLINE_CURVE,
MTY_FLAT_HERMITE_CURVE = 1ul << GTY_FLAT_HERMITE_CURVE,
MTY_ROUND_HERMITE_CURVE = 1ul << GTY_ROUND_HERMITE_CURVE,
MTY_ORIENTED_HERMITE_CURVE = 1ul << GTY_ORIENTED_HERMITE_CURVE,
MTY_FLAT_CATMULL_ROM_CURVE = 1ul << GTY_FLAT_CATMULL_ROM_CURVE,
MTY_ROUND_CATMULL_ROM_CURVE = 1ul << GTY_ROUND_CATMULL_ROM_CURVE,
MTY_ORIENTED_CATMULL_ROM_CURVE = 1ul << GTY_ORIENTED_CATMULL_ROM_CURVE,
MTY_CURVE2 = MTY_FLAT_LINEAR_CURVE | MTY_ROUND_LINEAR_CURVE | MTY_CONE_LINEAR_CURVE | MTY_ORIENTED_LINEAR_CURVE,
MTY_CURVE4 = MTY_FLAT_BEZIER_CURVE | MTY_ROUND_BEZIER_CURVE | MTY_ORIENTED_BEZIER_CURVE |
MTY_FLAT_BSPLINE_CURVE | MTY_ROUND_BSPLINE_CURVE | MTY_ORIENTED_BSPLINE_CURVE |
MTY_FLAT_HERMITE_CURVE | MTY_ROUND_HERMITE_CURVE | MTY_ORIENTED_HERMITE_CURVE |
MTY_FLAT_CATMULL_ROM_CURVE | MTY_ROUND_CATMULL_ROM_CURVE | MTY_ORIENTED_CATMULL_ROM_CURVE,
MTY_SPHERE_POINT = 1ul << GTY_SPHERE_POINT,
MTY_DISC_POINT = 1ul << GTY_DISC_POINT,
MTY_ORIENTED_DISC_POINT = 1ul << GTY_ORIENTED_DISC_POINT,
MTY_POINTS = MTY_SPHERE_POINT | MTY_DISC_POINT | MTY_ORIENTED_DISC_POINT,
MTY_CURVES = MTY_CURVE2 | MTY_CURVE4 | MTY_POINTS,
MTY_TRIANGLE_MESH = 1ul << GTY_TRIANGLE_MESH,
MTY_QUAD_MESH = 1ul << GTY_QUAD_MESH,
MTY_GRID_MESH = 1ul << GTY_GRID_MESH,
MTY_SUBDIV_MESH = 1ul << GTY_SUBDIV_MESH,
MTY_USER_GEOMETRY = 1ul << GTY_USER_GEOMETRY,
MTY_INSTANCE_CHEAP = 1ul << GTY_INSTANCE_CHEAP,
MTY_INSTANCE_EXPENSIVE = 1ul << GTY_INSTANCE_EXPENSIVE,
MTY_INSTANCE = MTY_INSTANCE_CHEAP | MTY_INSTANCE_EXPENSIVE,
MTY_INSTANCE_ARRAY = 1ul << GTY_INSTANCE_ARRAY,
MTY_ALL = -1
};
static const char* gtype_names[GTY_END];
enum class State : unsigned {
MODIFIED = 0,
COMMITTED = 1,
};
public:
/*! Geometry constructor */
Geometry (Device* device, GType gtype, unsigned int numPrimitives, unsigned int numTimeSteps);
/*! Geometry destructor */
virtual ~Geometry();
public:
/*! tests if geometry is enabled */
__forceinline bool isEnabled() const { return enabled; }
/*! tests if geometry is disabled */
__forceinline bool isDisabled() const { return !isEnabled(); }
/* checks if argument version of filter functions are enabled */
__forceinline bool hasArgumentFilterFunctions() const {
return argumentFilterEnabled;
}
/*! tests if that geometry has some filter function set */
__forceinline bool hasGeometryFilterFunctions () const {
return (intersectionFilterN != nullptr) || (occlusionFilterN != nullptr);
}
/*! returns geometry type */
__forceinline GType getType() const { return gtype; }
/*! returns curve type */
__forceinline GType getCurveType() const { return (GType)(gtype & GTY_SUBTYPE_MASK); }
/*! returns curve basis */
__forceinline GType getCurveBasis() const { return (GType)(gtype & GTY_BASIS_MASK); }
/*! returns geometry type mask */
__forceinline GTypeMask getTypeMask() const { return (GTypeMask)(1 << gtype); }
/*! returns true of geometry contains motion blur */
__forceinline bool hasMotionBlur () const {
return numTimeSteps > 1;
}
/*! returns number of primitives */
__forceinline size_t size() const { return numPrimitives; }
/*! sets the number of primitives */
virtual void setNumPrimitives(unsigned int numPrimitives_in);
/*! sets number of time steps */
virtual void setNumTimeSteps (unsigned int numTimeSteps_in);
/*! sets motion blur time range */
void setTimeRange (const BBox1f range);
/*! gets motion blur time range */
BBox1f getTimeRange () const;
/*! sets number of vertex attributes */
virtual void setVertexAttributeCount (unsigned int N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! sets number of topologies */
virtual void setTopologyCount (unsigned int N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! sets the build quality */
void setBuildQuality(RTCBuildQuality quality_in)
{
this->quality = quality_in;
Geometry::update();
}
/* calculate time segment itime and fractional time ftime */
__forceinline int timeSegment(float time, float& ftime) const {
return getTimeSegment(time,time_range.lower,time_range.upper,fnumTimeSegments,ftime);
}
template<int N>
__forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const {
return getTimeSegment<N>(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime);
}
/* calculate overlapping time segment range */
__forceinline range<int> timeSegmentRange(const BBox1f& range) const {
return getTimeSegmentRange(range,time_range,fnumTimeSegments);
}
/* returns time that corresponds to time step */
__forceinline float timeStep(const int i) const {
assert(i>=0 && i<(int)numTimeSteps);
return time_range.lower + time_range.size()*float(i)/fnumTimeSegments;
}
/*! for all geometries */
public:
/*! Enable geometry. */
virtual void enable();
/*! Update geometry. */
void update();
/*! commit of geometry */
virtual void commit();
/*! Update geometry buffer. */
virtual void updateBuffer(RTCBufferType type, unsigned int slot) {
update(); // update everything for geometries not supporting this call
}
/*! Disable geometry. */
virtual void disable();
/*! Verify the geometry */
virtual bool verify() { return true; }
/*! called before every build */
virtual void preCommit();
/*! called after every build */
virtual void postCommit();
virtual void addElementsToCount (GeometryCounts & counts) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
};
/*! sets constant tessellation rate for the geometry */
virtual void setTessellationRate(float N) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets the maximal curve radius scale allowed by min-width feature. */
virtual void setMaxRadiusScale(float s) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set user data pointer. */
virtual void setUserData(void* ptr);
/*! Get user data pointer. */
__forceinline void* getUserData() const {
return userPtr;
}
/*! interpolates user data to the specified u/v location */
virtual void interpolate(const RTCInterpolateArguments* const args) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! interpolates user data to the specified u/v locations */
virtual void interpolateN(const RTCInterpolateNArguments* const args);
/* point query api */
bool pointQuery(PointQuery* query, PointQueryContext* context);
/*! for subdivision surfaces only */
public:
virtual void setSubdivisionMode (unsigned topologyID, RTCSubdivisionMode mode) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual void setVertexAttributeTopology(unsigned int vertexBufferSlot, unsigned int indexBufferSlot) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set displacement function. */
virtual void setDisplacementFunction (RTCDisplacementFunctionN filter) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getFirstHalfEdge(unsigned int faceID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getFace(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getNextHalfEdge(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getPreviousHalfEdge(unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
virtual unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! get fast access to first vertex buffer if applicable */
virtual float * getCompactVertexArray () const {
return nullptr;
}
/*! Returns the modified counter - how many times the geo has been modified */
__forceinline unsigned int getModCounter () const {
return modCounter_;
}
/*! for triangle meshes and bezier curves only */
public:
/*! Sets ray mask. */
virtual void setMask(unsigned mask) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets specified buffer. */
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Gets specified buffer. */
virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set intersection filter function for ray packets of size N. */
virtual void setIntersectionFilterFunctionN (RTCFilterFunctionN filterN);
/*! Set occlusion filter function for ray packets of size N. */
virtual void setOcclusionFilterFunctionN (RTCFilterFunctionN filterN);
/* Enables argument version of intersection or occlusion filter function. */
virtual void enableFilterFunctionFromArguments (bool enable) {
argumentFilterEnabled = enable;
}
/*! for instances only */
public:
/*! Sets the instanced scene */
virtual void setInstancedScene(const Ref<Scene>& scene) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets the instanced scenes */
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets transformation of the instance */
virtual void setTransform(const AffineSpace3fa& transform, unsigned int timeStep) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Sets transformation of the instance */
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Returns the transformation of the instance */
virtual AffineSpace3fa getTransform(float time) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Returns the transformation of the instance */
virtual AffineSpace3fa getTransform(size_t instance, float time) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! for user geometries only */
public:
/*! Set bounds function. */
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set intersect function for ray packets of size N. */
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set occlusion function for ray packets of size N. */
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
/*! Set point query function. */
void setPointQueryFunction(RTCPointQueryFunction func);
/*! returns number of time segments */
__forceinline unsigned numTimeSegments () const {
return numTimeSteps-1;
}
public:
virtual PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefArray not implemented for this geometry");
}
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims.data(),r,k,geomID);
}
PrimInfo createPrimRefArray(avector<PrimRef>& prims, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims.data(),r,k,geomID);
}
virtual PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArray(prims,r,k,geomID);
}
virtual PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
/*! Calculates the PrimRef over the complete time interval */
virtual PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
}
PrimInfo createPrimRefArrayMB(avector<PrimRef>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefArrayMB(prims.data(),t0t1,r,k,geomID);
}
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"createPrimRefMBArray not implemented for this geometry");
}
virtual PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const {
return createPrimRefMBArray(prims,t0t1,r,k,geomID);
}
virtual LinearSpace3fa computeAlignedSpace(const size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
}
virtual LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeAlignedSpace not implemented for this geometry");
}
virtual Vec3fa computeDirection(unsigned int primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
}
virtual Vec3fa computeDirection(unsigned int primID, size_t time) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"computeDirection not implemented for this geometry");
}
virtual BBox3fa vbounds(size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual BBox3fa vbounds(const LinearSpace3fa& space, size_t primID) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vbounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const {
return vlinearBounds(primID,time_range);
}
virtual LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
virtual LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"vlinearBounds not implemented for this geometry");
}
public:
__forceinline bool hasIntersectionFilter() const { return intersectionFilterN != nullptr; }
__forceinline bool hasOcclusionFilter() const { return occlusionFilterN != nullptr; }
public:
Device* device; //!< device this geometry belongs to
void* userPtr; //!< user pointer
unsigned int numPrimitives; //!< number of primitives of this geometry
unsigned int numTimeSteps; //!< number of time steps
float fnumTimeSegments; //!< number of time segments (precalculation)
BBox1f time_range; //!< motion blur time range
unsigned int mask; //!< for masking out geometry
unsigned int modCounter_ = 1; //!< counter for every modification - used to rebuild scenes when geo is modified
struct {
GType gtype : 8; //!< geometry type
GSubType gsubtype : 8; //!< geometry subtype
RTCBuildQuality quality : 3; //!< build quality for geometry
unsigned state : 2;
bool enabled : 1; //!< true if geometry is enabled
bool argumentFilterEnabled : 1; //!< true if argument filter functions are enabled for this geometry
};
RTCFilterFunctionN intersectionFilterN;
RTCFilterFunctionN occlusionFilterN;
RTCPointQueryFunction pointQueryFunc;
};
}

View file

@ -0,0 +1,153 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "ray.h"
#include "instance_stack.h"
namespace embree
{
/* Hit structure for K hits */
template<int K>
struct HitK
{
/* Default construction does nothing */
__forceinline HitK() {}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const vfloat<K>& u, const vfloat<K>& v, const Vec3vf<K>& Ng)
: Ng(Ng), u(u), v(v), primID(primID), geomID(geomID)
{
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
instID[l] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
instance_id_stack::copy_UV<K>(context->instID, instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UV<K>(context->instPrimID, instPrimID);
#endif
}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, const vuint<K>& geomID, const vuint<K>& primID, const Vec2vf<K>& uv, const Vec3vf<K>& Ng)
: HitK(context,geomID,primID,uv.x,uv.y,Ng) {}
/* Returns the size of the hit */
static __forceinline size_t size() { return K; }
public:
Vec3vf<K> Ng; // geometry normal
vfloat<K> u; // barycentric u coordinate of hit
vfloat<K> v; // barycentric v coordinate of hit
vuint<K> primID; // primitive ID
vuint<K> geomID; // geometry ID
vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
#endif
};
/* Specialization for a single hit */
template<>
struct __aligned(16) HitK<1>
{
/* Default construction does nothing */
__forceinline HitK() {}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng)
: Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID)
{
instance_id_stack::copy_UU(context, context->instID, instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(context, context->instPrimID, instPrimID);
#endif
}
/* Constructs a hit */
__forceinline HitK(const RTCRayQueryContext* context, unsigned int geomID, unsigned int primID, const Vec2f& uv, const Vec3fa& Ng)
: HitK<1>(context,geomID,primID,uv.x,uv.y,Ng) {}
/* Returns the size of the hit */
static __forceinline size_t size() { return 1; }
public:
Vec3<float> Ng; // geometry normal
float u; // barycentric u coordinate of hit
float v; // barycentric v coordinate of hit
unsigned int primID; // primitive ID
unsigned int geomID; // geometry ID
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
#endif
};
/* Shortcuts */
typedef HitK<1> Hit;
typedef HitK<4> Hit4;
typedef HitK<8> Hit8;
typedef HitK<16> Hit16;
typedef HitK<VSIZEX> Hitx;
/* Outputs hit to stream */
template<int K>
__forceinline embree_ostream operator<<(embree_ostream cout, const HitK<K>& ray)
{
cout << "{ " << embree_endl
<< " Ng = " << ray.Ng << embree_endl
<< " u = " << ray.u << embree_endl
<< " v = " << ray.v << embree_endl
<< " primID = " << ray.primID << embree_endl
<< " geomID = " << ray.geomID << embree_endl
<< " instID =";
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
{
cout << " " << ray.instID[l];
}
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
cout << " instPrimID =";
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
{
cout << " " << ray.instPrimID[l];
}
#endif
cout << embree_endl;
return cout << "}";
}
template<typename Hit>
__forceinline void copyHitToRay(RayHit& ray, const Hit& hit)
{
ray.Ng = hit.Ng;
ray.u = hit.u;
ray.v = hit.v;
ray.primID = hit.primID;
ray.geomID = hit.geomID;
instance_id_stack::copy_UU(hit.instID, ray.instID);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_UU(hit.instPrimID, ray.instPrimID);
#endif
}
template<int K>
__forceinline void copyHitToRay(const vbool<K>& mask, RayHitK<K>& ray, const HitK<K>& hit)
{
vfloat<K>::storeu(mask,&ray.Ng.x, hit.Ng.x);
vfloat<K>::storeu(mask,&ray.Ng.y, hit.Ng.y);
vfloat<K>::storeu(mask,&ray.Ng.z, hit.Ng.z);
vfloat<K>::storeu(mask,&ray.u, hit.u);
vfloat<K>::storeu(mask,&ray.v, hit.v);
vuint<K>::storeu(mask,&ray.primID, hit.primID);
vuint<K>::storeu(mask,&ray.geomID, hit.geomID);
instance_id_stack::copy_VV<K>(hit.instID, ray.instID, mask);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
instance_id_stack::copy_VV<K>(hit.instPrimID, ray.instPrimID, mask);
#endif
}
}

View file

@ -0,0 +1,265 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "rtcore.h"
namespace embree {
namespace instance_id_stack {
static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0,
"RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0.");
/*******************************************************************************
* Instance ID stack manipulation.
* This is used from the instance intersector.
******************************************************************************/
/*
* Push an instance to the stack.
*/
template<typename Context>
RTC_FORCEINLINE bool push(Context context,
unsigned instanceId,
unsigned instancePrimId)
{
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT;
/* We assert here because instances are silently dropped when the stack is full.
This might be quite hard to find in production. */
assert(spaceAvailable);
if (likely(spaceAvailable)) {
context->instID[context->instStackSize] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = instancePrimId;
#endif
context->instStackSize++;
}
return spaceAvailable;
#else
const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID);
assert(spaceAvailable);
if (likely(spaceAvailable)) {
context->instID[0] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[0] = instancePrimId;
#endif
}
return spaceAvailable;
#endif
}
/*
* Pop the last instance pushed to the stack.
* Do not call on an empty stack.
*/
template<typename Context>
RTC_FORCEINLINE void pop(Context context)
{
assert(context);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
assert(context->instStackSize > 0);
--context->instStackSize;
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#endif
#else
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
context->instID[0] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[0] = RTC_INVALID_GEOMETRY_ID;
#endif
#endif
}
/* Push an instance to the stack. Used for point queries*/
RTC_FORCEINLINE bool push(RTCPointQueryContext* context,
unsigned int instanceId,
unsigned int instancePrimId,
AffineSpace3fa const& w2i,
AffineSpace3fa const& i2w)
{
assert(context);
const size_t stackSize = context->instStackSize;
assert(stackSize < RTC_MAX_INSTANCE_LEVEL_COUNT);
context->instID[stackSize] = instanceId;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[stackSize] = instancePrimId;
#endif
AffineSpace3fa_store_unaligned(w2i,(AffineSpace3fa*)context->world2inst[stackSize]);
AffineSpace3fa_store_unaligned(i2w,(AffineSpace3fa*)context->inst2world[stackSize]);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
if (unlikely(stackSize > 0))
{
const AffineSpace3fa world2inst = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize ])
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->world2inst[stackSize-1]);
const AffineSpace3fa inst2world = AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize-1])
* AffineSpace3fa_load_unaligned((AffineSpace3fa*)context->inst2world[stackSize ]);
AffineSpace3fa_store_unaligned(world2inst,(AffineSpace3fa*)context->world2inst[stackSize]);
AffineSpace3fa_store_unaligned(inst2world,(AffineSpace3fa*)context->inst2world[stackSize]);
}
#endif
context->instStackSize++;
return true;
}
template<>
RTC_FORCEINLINE void pop(RTCPointQueryContext* context)
{
assert(context);
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
assert(context->instStackSize > 0);
#else
assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID);
#endif
--context->instStackSize;
context->instID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[context->instStackSize] = RTC_INVALID_GEOMETRY_ID;
#endif
}
/*
* Optimized instance id stack copy.
* The copy() functions will either copy full
* stacks or copy only until the last valid element has been copied, depending
* on RTC_MAX_INSTANCE_LEVEL_COUNT.
*/
RTC_FORCEINLINE void copy_UU(const unsigned* src, unsigned* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
RTC_FORCEINLINE void copy_UU(const RTCRayQueryContext* context, const unsigned* src, unsigned* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
unsigned int depth = context->instStackSize;
for (unsigned l = 0; l < depth; ++l)
tgt[l] = src[l];
for (unsigned l = depth; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
tgt[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, size_t j)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0][j] = src[0];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l][j] = src[l];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, const vbool<K>& mask)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
vuint<K>::store(mask, tgt, src[0]);
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
vuint<K>::store(mask, tgt + l, src[l]);
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VU(const vuint<K>* src, unsigned* tgt, size_t i)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0] = src[0][i];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l] = src[l][i];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, size_t i, size_t j)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
tgt[0][j] = src[0][i];
#else
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
tgt[l][j] = src[l][i];
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4)
if (src[l][i] == RTC_INVALID_GEOMETRY_ID)
break;
}
#endif
}
template <int K>
RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask)
{
#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1)
vuint<K>::store(mask, tgt, src[0]);
#else
vbool<K> done = !mask;
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
vuint<K>::store(mask, tgt + l, src[l]);
if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) {
done |= src[l] == RTC_INVALID_GEOMETRY_ID;
if (all(done)) break;
}
}
#endif
}
} // namespace instance_id_stack
} // namespace embree

View file

@ -0,0 +1,246 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/sys/platform.h"
#include "../../common/sys/sysinfo.h"
namespace embree
{
#define DEFINE_SYMBOL2(type,name) \
typedef type (*name##Func)(); \
name##Func name;
#define DECLARE_SYMBOL2(type,name) \
namespace sse2 { extern type name(); } \
namespace sse42 { extern type name(); } \
namespace avx { extern type name(); } \
namespace avx2 { extern type name(); } \
namespace avx512 { extern type name(); } \
void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \
type name##_error() { return type(name##_error2); } \
type name##_zero() { return type(nullptr); }
#define DECLARE_ISA_FUNCTION(type,symbol,args) \
namespace sse2 { extern type symbol(args); } \
namespace sse42 { extern type symbol(args); } \
namespace avx { extern type symbol(args); } \
namespace avx2 { extern type symbol(args); } \
namespace avx512 { extern type symbol(args); } \
inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \
typedef type (*symbol##Ty)(args); \
#define DEFINE_ISA_FUNCTION(type,symbol,args) \
typedef type (*symbol##Func)(args); \
symbol##Func symbol;
#define ZERO_SYMBOL(features,intersector) \
intersector = intersector##_zero;
#define INIT_SYMBOL(features,intersector) \
intersector = decltype(intersector)(intersector##_error);
#define SELECT_SYMBOL_DEFAULT(features,intersector) \
intersector = isa::intersector;
#if defined(__SSE__) || defined(__ARM_NEON)
#if !defined(EMBREE_TARGET_SIMD4)
#define EMBREE_TARGET_SIMD4
#endif
#endif
#if defined(EMBREE_TARGET_SSE42)
#define SELECT_SYMBOL_SSE42(features,intersector) \
if ((features & SSE42) == SSE42) intersector = sse42::intersector;
#else
#define SELECT_SYMBOL_SSE42(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX) || defined(__AVX__)
#if !defined(EMBREE_TARGET_SIMD8)
#define EMBREE_TARGET_SIMD8
#endif
#if defined(__AVX__) // if default ISA is >= AVX we treat AVX target as default target
#define SELECT_SYMBOL_AVX(features,intersector) \
if ((features & ISA) == ISA) intersector = isa::intersector;
#else
#define SELECT_SYMBOL_AVX(features,intersector) \
if ((features & AVX) == AVX) intersector = avx::intersector;
#endif
#else
#define SELECT_SYMBOL_AVX(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX2)
#if !defined(EMBREE_TARGET_SIMD8)
#define EMBREE_TARGET_SIMD8
#endif
#define SELECT_SYMBOL_AVX2(features,intersector) \
if ((features & AVX2) == AVX2) intersector = avx2::intersector;
#else
#define SELECT_SYMBOL_AVX2(features,intersector)
#endif
#if defined(EMBREE_TARGET_AVX512)
#if !defined(EMBREE_TARGET_SIMD16)
#define EMBREE_TARGET_SIMD16
#endif
#define SELECT_SYMBOL_AVX512(features,intersector) \
if ((features & AVX512) == AVX512) intersector = avx512::intersector;
#else
#define SELECT_SYMBOL_AVX512(features,intersector)
#endif
#define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512(features,intersector) \
ZERO_SYMBOL(features,intersector); \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \
SELECT_SYMBOL_DEFAULT(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_INIT_AVX512(features,intersector) \
INIT_SYMBOL(features,intersector); \
SELECT_SYMBOL_AVX512(features,intersector);
#define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \
SELECT_SYMBOL_SSE42(features,intersector); \
SELECT_SYMBOL_AVX(features,intersector); \
SELECT_SYMBOL_AVX2(features,intersector);
struct VerifyMultiTargetLinking {
static __noinline int getISA(int depth = 5) {
if (depth == 0) return ISA;
else return getISA(depth-1);
}
};
namespace sse2 { int getISA(); };
namespace sse42 { int getISA(); };
namespace avx { int getISA(); };
namespace avx2 { int getISA(); };
namespace avx512 { int getISA(); };
}

View file

@ -0,0 +1,325 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../common/math/affinespace.h"
#include "../../common/math/interval.h"
#include <functional>
namespace embree {
#define MOTION_DERIVATIVE_ROOT_EPSILON 1e-4f
static void motion_derivative_coefficients(const float *p, float *coeff);
struct MotionDerivativeCoefficients
{
float theta;
float coeffs[3*8*7];
MotionDerivativeCoefficients() {}
// xfm0 and xfm1 are interpret as quaternion decomposition
MotionDerivativeCoefficients(AffineSpace3ff const& xfm0, AffineSpace3ff const& xfm1)
{
// cosTheta of the two quaternions
const float cosTheta = min(1.f, max(-1.f,
xfm0.l.vx.w * xfm1.l.vx.w
+ xfm0.l.vy.w * xfm1.l.vy.w
+ xfm0.l.vz.w * xfm1.l.vz.w
+ xfm0.p.w * xfm1.p.w));
theta = std::acos(cosTheta);
Vec4f qperp(xfm1.p.w, xfm1.l.vx.w, xfm1.l.vy.w, xfm1.l.vz.w);
if (cosTheta < 0.995f) {
// compute perpendicular quaternion
qperp.x = xfm1.p.w - cosTheta * xfm0.p.w;
qperp.y = xfm1.l.vx.w - cosTheta * xfm0.l.vx.w;
qperp.z = xfm1.l.vy.w - cosTheta * xfm0.l.vy.w;
qperp.w = xfm1.l.vz.w - cosTheta * xfm0.l.vz.w;
qperp = normalize(qperp);
}
const float p[33] = {
theta,
xfm0.l.vx.y, xfm0.l.vx.z, xfm0.l.vy.z, // translation component of xfm0
xfm1.l.vx.y, xfm1.l.vx.z, xfm1.l.vy.z, // translation component of xfm1
xfm0.p.w, xfm0.l.vx.w, xfm0.l.vy.w, xfm0.l.vz.w, // quaternion of xfm0
qperp.x, qperp.y, qperp.z, qperp.w,
xfm0.l.vx.x, xfm0.l.vy.x, xfm0.l.vz.x, xfm0.p.x, // scale/skew component of xfm0
xfm0.l.vy.y, xfm0.l.vz.y, xfm0.p.y,
xfm0.l.vz.z, xfm0.p.z,
xfm1.l.vx.x, xfm1.l.vy.x, xfm1.l.vz.x, xfm1.p.x, // scale/skew component of xfm1
xfm1.l.vy.y, xfm1.l.vz.y, xfm1.p.y,
xfm1.l.vz.z, xfm1.p.z
};
motion_derivative_coefficients(p, coeffs);
}
};
struct MotionDerivative
{
float twoTheta;
float c[8];
MotionDerivative(MotionDerivativeCoefficients const& mdc,
int dim, Vec3fa const& p0, Vec3fa const& p1)
: twoTheta(2.f*mdc.theta)
{
const float p[7] = { 1, p0.x, p0.y, p0.z, p1.x, p1.y, p1.z };
for (int i = 0; i < 8; ++i) {
c[i] = 0;
for (int j = 0; j < 7; ++j) {
c[i] += mdc.coeffs[8*7*dim + i*7 + j] * p[j];
}
}
}
template<typename T>
struct EvalMotionDerivative
{
MotionDerivative const& md;
float offset;
EvalMotionDerivative(MotionDerivative const& md, float offset) : md(md), offset(offset) {}
T operator()(T const& time) const {
return md.c[0] + md.c[1] * time
+ (md.c[2] + md.c[3] * time + md.c[4] * time * time) * cos(md.twoTheta * time)
+ (md.c[5] + md.c[6] * time + md.c[7] * time * time) * sin(md.twoTheta * time)
+ offset;
}
};
unsigned int findRoots(
Interval1f const& interval,
float offset,
float* roots,
unsigned int maxNumRoots)
{
unsigned int numRoots = 0;
EvalMotionDerivative<Interval1f> eval(*this, offset);
findRoots(eval, interval, numRoots, roots, maxNumRoots);
return numRoots;
}
template<typename Eval>
static void findRoots(
Eval const& eval,
Interval1f const& interval,
unsigned int& numRoots,
float* roots,
unsigned int maxNumRoots)
{
Interval1f range = eval(interval);
if (range.lower > 0 || range.upper < 0 || range.lower >= range.upper) return;
const float split = 0.5f * (interval.upper + interval.lower);
if (interval.upper-interval.lower < 1e-7f || abs(split-interval.lower) < 1e-7f || abs(split-interval.upper) < 1e-7f)
{
// check if the root already exists
for (unsigned int k = 0; k < numRoots && k < maxNumRoots; ++k) {
if (abs(roots[k]-split) < MOTION_DERIVATIVE_ROOT_EPSILON)
return;
}
if (numRoots < maxNumRoots) {
roots[numRoots++] = split;
}
if (numRoots > maxNumRoots) {
printf("error: more roots than expected\n"); // FIXME: workaround for ICC2019.4 compiler bug under macOS
return;
}
return;
}
findRoots(eval, Interval1f(interval.lower, split), numRoots, roots, maxNumRoots);
findRoots(eval, Interval1f(split, interval.upper), numRoots, roots, maxNumRoots);
}
};
/******************************************************************************
* Code generated with sympy 1.4 *
* See http://www.sympy.org/ for more information. *
* *
* see *
* *
* scripts/generate_motion_derivative_coefficients.py *
* *
* for how this code is generated *
* *
******************************************************************************/
static void motion_derivative_coefficients(const float *p, float *coeff)
{
coeff[0] = -p[1] + p[4] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27] - p[18] + p[27];
coeff[1] = 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - p[14]*p[14]*p[24] - 2*p[15] + p[24];
coeff[2] = 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - p[14]*p[14]*p[25] - 2*p[16] + p[25];
coeff[3] = -2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - p[14]*p[14]*p[26] - 2*p[17] + p[26];
coeff[4] = (-p[9]*p[9] - p[10]*p[10] - p[13]*p[13] - p[14]*p[14] + 1)*p[15];
coeff[5] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] - p[11]*p[14]*p[19] + p[12]*p[13]*p[19] - p[13]*p[13]*p[16] - p[14]*p[14]*p[16] + p[16];
coeff[6] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] + p[11]*p[13]*p[22] - p[11]*p[14]*p[20] + p[12]*p[13]*p[20] + p[12]*p[14]*p[22] - p[13]*p[13]*p[17] - p[14]*p[14]*p[17] + p[17];
coeff[7] = 0;
coeff[8] = -2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24] + 2*p[15] - 2*p[24];
coeff[9] = -2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25] + 2*p[16] - 2*p[25];
coeff[10] = 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26] + 2*p[17] - 2*p[26];
coeff[11] = 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24] - 2*p[15] + 2*p[24];
coeff[12] = 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25] - 2*p[16] + 2*p[25];
coeff[13] = -2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26] - 2*p[17] + 2*p[26];
coeff[14] = 2*p[0]*p[7]*p[11]*p[18] + 2*p[0]*p[7]*p[13]*p[23] - 2*p[0]*p[7]*p[14]*p[21] + 2*p[0]*p[8]*p[12]*p[18] + 2*p[0]*p[8]*p[13]*p[21] + 2*p[0]*p[8]*p[14]*p[23] + 2*p[0]*p[9]*p[11]*p[23] + 2*p[0]*p[9]*p[12]*p[21] - 2*p[0]*p[9]*p[13]*p[18] - 2*p[0]*p[10]*p[11]*p[21] + 2*p[0]*p[10]*p[12]*p[23] - 2*p[0]*p[10]*p[14]*p[18] - p[7]*p[9]*p[23] + p[7]*p[9]*p[32] + p[7]*p[10]*p[21] - p[7]*p[10]*p[30] - p[8]*p[9]*p[21] + p[8]*p[9]*p[30] - p[8]*p[10]*p[23] + p[8]*p[10]*p[32] + p[9]*p[9]*p[18] - p[9]*p[9]*p[27] + p[10]*p[10]*p[18] - p[10]*p[10]*p[27] + p[11]*p[13]*p[23] - p[11]*p[13]*p[32] - p[11]*p[14]*p[21] + p[11]*p[14]*p[30] + p[12]*p[13]*p[21] - p[12]*p[13]*p[30] + p[12]*p[14]*p[23] - p[12]*p[14]*p[32] - p[13]*p[13]*p[18] + p[13]*p[13]*p[27] - p[14]*p[14]*p[18] + p[14]*p[14]*p[27];
coeff[15] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + p[14]*p[14]*p[24];
coeff[16] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + p[14]*p[14]*p[25];
coeff[17] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + p[14]*p[14]*p[26];
coeff[18] = (-p[9]*p[9] - p[10]*p[10] + p[13]*p[13] + p[14]*p[14])*p[15];
coeff[19] = -p[7]*p[10]*p[19] + p[8]*p[9]*p[19] - p[9]*p[9]*p[16] - p[10]*p[10]*p[16] + p[11]*p[14]*p[19] - p[12]*p[13]*p[19] + p[13]*p[13]*p[16] + p[14]*p[14]*p[16];
coeff[20] = p[7]*p[9]*p[22] - p[7]*p[10]*p[20] + p[8]*p[9]*p[20] + p[8]*p[10]*p[22] - p[9]*p[9]*p[17] - p[10]*p[10]*p[17] - p[11]*p[13]*p[22] + p[11]*p[14]*p[20] - p[12]*p[13]*p[20] - p[12]*p[14]*p[22] + p[13]*p[13]*p[17] + p[14]*p[14]*p[17];
coeff[21] = 2*(-p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27])*p[0];
coeff[22] = -4*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[7]*p[11]*p[24] - 4*p[0]*p[8]*p[12]*p[15] + 2*p[0]*p[8]*p[12]*p[24] + 4*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[9]*p[13]*p[24] + 4*p[0]*p[10]*p[14]*p[15] - 2*p[0]*p[10]*p[14]*p[24] - 2*p[9]*p[9]*p[15] + 2*p[9]*p[9]*p[24] - 2*p[10]*p[10]*p[15] + 2*p[10]*p[10]*p[24] + 2*p[13]*p[13]*p[15] - 2*p[13]*p[13]*p[24] + 2*p[14]*p[14]*p[15] - 2*p[14]*p[14]*p[24];
coeff[23] = -4*p[0]*p[7]*p[11]*p[16] + 2*p[0]*p[7]*p[11]*p[25] + 4*p[0]*p[7]*p[14]*p[19] - 2*p[0]*p[7]*p[14]*p[28] - 4*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[12]*p[25] - 4*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[8]*p[13]*p[28] - 4*p[0]*p[9]*p[12]*p[19] + 2*p[0]*p[9]*p[12]*p[28] + 4*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[9]*p[13]*p[25] + 4*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[11]*p[28] + 4*p[0]*p[10]*p[14]*p[16] - 2*p[0]*p[10]*p[14]*p[25] - 2*p[7]*p[10]*p[19] + 2*p[7]*p[10]*p[28] + 2*p[8]*p[9]*p[19] - 2*p[8]*p[9]*p[28] - 2*p[9]*p[9]*p[16] + 2*p[9]*p[9]*p[25] - 2*p[10]*p[10]*p[16] + 2*p[10]*p[10]*p[25] + 2*p[11]*p[14]*p[19] - 2*p[11]*p[14]*p[28] - 2*p[12]*p[13]*p[19] + 2*p[12]*p[13]*p[28] + 2*p[13]*p[13]*p[16] - 2*p[13]*p[13]*p[25] + 2*p[14]*p[14]*p[16] - 2*p[14]*p[14]*p[25];
coeff[24] = -4*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[11]*p[26] - 4*p[0]*p[7]*p[13]*p[22] + 2*p[0]*p[7]*p[13]*p[31] + 4*p[0]*p[7]*p[14]*p[20] - 2*p[0]*p[7]*p[14]*p[29] - 4*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[12]*p[26] - 4*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[13]*p[29] - 4*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[8]*p[14]*p[31] - 4*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[11]*p[31] - 4*p[0]*p[9]*p[12]*p[20] + 2*p[0]*p[9]*p[12]*p[29] + 4*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[9]*p[13]*p[26] + 4*p[0]*p[10]*p[11]*p[20] - 2*p[0]*p[10]*p[11]*p[29] - 4*p[0]*p[10]*p[12]*p[22] + 2*p[0]*p[10]*p[12]*p[31] + 4*p[0]*p[10]*p[14]*p[17] - 2*p[0]*p[10]*p[14]*p[26] + 2*p[7]*p[9]*p[22] - 2*p[7]*p[9]*p[31] - 2*p[7]*p[10]*p[20] + 2*p[7]*p[10]*p[29] + 2*p[8]*p[9]*p[20] - 2*p[8]*p[9]*p[29] + 2*p[8]*p[10]*p[22] - 2*p[8]*p[10]*p[31] - 2*p[9]*p[9]*p[17] + 2*p[9]*p[9]*p[26] - 2*p[10]*p[10]*p[17] + 2*p[10]*p[10]*p[26] - 2*p[11]*p[13]*p[22] + 2*p[11]*p[13]*p[31] + 2*p[11]*p[14]*p[20] - 2*p[11]*p[14]*p[29] - 2*p[12]*p[13]*p[20] + 2*p[12]*p[13]*p[29] - 2*p[12]*p[14]*p[22] + 2*p[12]*p[14]*p[31] + 2*p[13]*p[13]*p[17] - 2*p[13]*p[13]*p[26] + 2*p[14]*p[14]*p[17] - 2*p[14]*p[14]*p[26];
coeff[25] = 2*p[0]*p[7]*p[11]*p[15] + 2*p[0]*p[8]*p[12]*p[15] - 2*p[0]*p[9]*p[13]*p[15] - 2*p[0]*p[10]*p[14]*p[15] + 2*p[9]*p[9]*p[15] - 2*p[9]*p[9]*p[24] + 2*p[10]*p[10]*p[15] - 2*p[10]*p[10]*p[24] - 2*p[13]*p[13]*p[15] + 2*p[13]*p[13]*p[24] - 2*p[14]*p[14]*p[15] + 2*p[14]*p[14]*p[24];
coeff[26] = 2*p[0]*p[7]*p[11]*p[16] - 2*p[0]*p[7]*p[14]*p[19] + 2*p[0]*p[8]*p[12]*p[16] + 2*p[0]*p[8]*p[13]*p[19] + 2*p[0]*p[9]*p[12]*p[19] - 2*p[0]*p[9]*p[13]*p[16] - 2*p[0]*p[10]*p[11]*p[19] - 2*p[0]*p[10]*p[14]*p[16] + 2*p[7]*p[10]*p[19] - 2*p[7]*p[10]*p[28] - 2*p[8]*p[9]*p[19] + 2*p[8]*p[9]*p[28] + 2*p[9]*p[9]*p[16] - 2*p[9]*p[9]*p[25] + 2*p[10]*p[10]*p[16] - 2*p[10]*p[10]*p[25] - 2*p[11]*p[14]*p[19] + 2*p[11]*p[14]*p[28] + 2*p[12]*p[13]*p[19] - 2*p[12]*p[13]*p[28] - 2*p[13]*p[13]*p[16] + 2*p[13]*p[13]*p[25] - 2*p[14]*p[14]*p[16] + 2*p[14]*p[14]*p[25];
coeff[27] = 2*p[0]*p[7]*p[11]*p[17] + 2*p[0]*p[7]*p[13]*p[22] - 2*p[0]*p[7]*p[14]*p[20] + 2*p[0]*p[8]*p[12]*p[17] + 2*p[0]*p[8]*p[13]*p[20] + 2*p[0]*p[8]*p[14]*p[22] + 2*p[0]*p[9]*p[11]*p[22] + 2*p[0]*p[9]*p[12]*p[20] - 2*p[0]*p[9]*p[13]*p[17] - 2*p[0]*p[10]*p[11]*p[20] + 2*p[0]*p[10]*p[12]*p[22] - 2*p[0]*p[10]*p[14]*p[17] - 2*p[7]*p[9]*p[22] + 2*p[7]*p[9]*p[31] + 2*p[7]*p[10]*p[20] - 2*p[7]*p[10]*p[29] - 2*p[8]*p[9]*p[20] + 2*p[8]*p[9]*p[29] - 2*p[8]*p[10]*p[22] + 2*p[8]*p[10]*p[31] + 2*p[9]*p[9]*p[17] - 2*p[9]*p[9]*p[26] + 2*p[10]*p[10]*p[17] - 2*p[10]*p[10]*p[26] + 2*p[11]*p[13]*p[22] - 2*p[11]*p[13]*p[31] - 2*p[11]*p[14]*p[20] + 2*p[11]*p[14]*p[29] + 2*p[12]*p[13]*p[20] - 2*p[12]*p[13]*p[29] + 2*p[12]*p[14]*p[22] - 2*p[12]*p[14]*p[31] - 2*p[13]*p[13]*p[17] + 2*p[13]*p[13]*p[26] - 2*p[14]*p[14]*p[17] + 2*p[14]*p[14]*p[26];
coeff[28] = 0;
coeff[29] = 2*(p[7]*p[11]*p[15] - p[7]*p[11]*p[24] + p[8]*p[12]*p[15] - p[8]*p[12]*p[24] - p[9]*p[13]*p[15] + p[9]*p[13]*p[24] - p[10]*p[14]*p[15] + p[10]*p[14]*p[24])*p[0];
coeff[30] = 2*(p[7]*p[11]*p[16] - p[7]*p[11]*p[25] - p[7]*p[14]*p[19] + p[7]*p[14]*p[28] + p[8]*p[12]*p[16] - p[8]*p[12]*p[25] + p[8]*p[13]*p[19] - p[8]*p[13]*p[28] + p[9]*p[12]*p[19] - p[9]*p[12]*p[28] - p[9]*p[13]*p[16] + p[9]*p[13]*p[25] - p[10]*p[11]*p[19] + p[10]*p[11]*p[28] - p[10]*p[14]*p[16] + p[10]*p[14]*p[25])*p[0];
coeff[31] = 2*(p[7]*p[11]*p[17] - p[7]*p[11]*p[26] + p[7]*p[13]*p[22] - p[7]*p[13]*p[31] - p[7]*p[14]*p[20] + p[7]*p[14]*p[29] + p[8]*p[12]*p[17] - p[8]*p[12]*p[26] + p[8]*p[13]*p[20] - p[8]*p[13]*p[29] + p[8]*p[14]*p[22] - p[8]*p[14]*p[31] + p[9]*p[11]*p[22] - p[9]*p[11]*p[31] + p[9]*p[12]*p[20] - p[9]*p[12]*p[29] - p[9]*p[13]*p[17] + p[9]*p[13]*p[26] - p[10]*p[11]*p[20] + p[10]*p[11]*p[29] + p[10]*p[12]*p[22] - p[10]*p[12]*p[31] - p[10]*p[14]*p[17] + p[10]*p[14]*p[26])*p[0];
coeff[32] = 2*(-p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + p[10]*p[14]*p[15] - p[10]*p[14]*p[24])*p[0];
coeff[33] = 2*(-p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + p[10]*p[14]*p[16] - p[10]*p[14]*p[25])*p[0];
coeff[34] = 2*(-p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + p[10]*p[14]*p[17] - p[10]*p[14]*p[26])*p[0];
coeff[35] = -2*p[0]*p[7]*p[9]*p[23] + 2*p[0]*p[7]*p[10]*p[21] - 2*p[0]*p[8]*p[9]*p[21] - 2*p[0]*p[8]*p[10]*p[23] + 2*p[0]*p[9]*p[9]*p[18] + 2*p[0]*p[10]*p[10]*p[18] + 2*p[0]*p[11]*p[13]*p[23] - 2*p[0]*p[11]*p[14]*p[21] + 2*p[0]*p[12]*p[13]*p[21] + 2*p[0]*p[12]*p[14]*p[23] - 2*p[0]*p[13]*p[13]*p[18] - 2*p[0]*p[14]*p[14]*p[18] - p[7]*p[11]*p[18] + p[7]*p[11]*p[27] - p[7]*p[13]*p[23] + p[7]*p[13]*p[32] + p[7]*p[14]*p[21] - p[7]*p[14]*p[30] - p[8]*p[12]*p[18] + p[8]*p[12]*p[27] - p[8]*p[13]*p[21] + p[8]*p[13]*p[30] - p[8]*p[14]*p[23] + p[8]*p[14]*p[32] - p[9]*p[11]*p[23] + p[9]*p[11]*p[32] - p[9]*p[12]*p[21] + p[9]*p[12]*p[30] + p[9]*p[13]*p[18] - p[9]*p[13]*p[27] + p[10]*p[11]*p[21] - p[10]*p[11]*p[30] - p[10]*p[12]*p[23] + p[10]*p[12]*p[32] + p[10]*p[14]*p[18] - p[10]*p[14]*p[27];
coeff[36] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - p[10]*p[14]*p[24];
coeff[37] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - p[10]*p[14]*p[25];
coeff[38] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - p[10]*p[14]*p[26];
coeff[39] = (p[7]*p[11] + p[8]*p[12] - p[9]*p[13] - p[10]*p[14])*p[15];
coeff[40] = p[7]*p[11]*p[16] - p[7]*p[14]*p[19] + p[8]*p[12]*p[16] + p[8]*p[13]*p[19] + p[9]*p[12]*p[19] - p[9]*p[13]*p[16] - p[10]*p[11]*p[19] - p[10]*p[14]*p[16];
coeff[41] = p[7]*p[11]*p[17] + p[7]*p[13]*p[22] - p[7]*p[14]*p[20] + p[8]*p[12]*p[17] + p[8]*p[13]*p[20] + p[8]*p[14]*p[22] + p[9]*p[11]*p[22] + p[9]*p[12]*p[20] - p[9]*p[13]*p[17] - p[10]*p[11]*p[20] + p[10]*p[12]*p[22] - p[10]*p[14]*p[17];
coeff[42] = 2*(p[7]*p[9]*p[23] - p[7]*p[9]*p[32] - p[7]*p[10]*p[21] + p[7]*p[10]*p[30] + p[8]*p[9]*p[21] - p[8]*p[9]*p[30] + p[8]*p[10]*p[23] - p[8]*p[10]*p[32] - p[9]*p[9]*p[18] + p[9]*p[9]*p[27] - p[10]*p[10]*p[18] + p[10]*p[10]*p[27] - p[11]*p[13]*p[23] + p[11]*p[13]*p[32] + p[11]*p[14]*p[21] - p[11]*p[14]*p[30] - p[12]*p[13]*p[21] + p[12]*p[13]*p[30] - p[12]*p[14]*p[23] + p[12]*p[14]*p[32] + p[13]*p[13]*p[18] - p[13]*p[13]*p[27] + p[14]*p[14]*p[18] - p[14]*p[14]*p[27])*p[0];
coeff[43] = -4*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[9]*p[9]*p[24] - 4*p[0]*p[10]*p[10]*p[15] + 2*p[0]*p[10]*p[10]*p[24] + 4*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[13]*p[13]*p[24] + 4*p[0]*p[14]*p[14]*p[15] - 2*p[0]*p[14]*p[14]*p[24] + 2*p[7]*p[11]*p[15] - 2*p[7]*p[11]*p[24] + 2*p[8]*p[12]*p[15] - 2*p[8]*p[12]*p[24] - 2*p[9]*p[13]*p[15] + 2*p[9]*p[13]*p[24] - 2*p[10]*p[14]*p[15] + 2*p[10]*p[14]*p[24];
coeff[44] = -4*p[0]*p[7]*p[10]*p[19] + 2*p[0]*p[7]*p[10]*p[28] + 4*p[0]*p[8]*p[9]*p[19] - 2*p[0]*p[8]*p[9]*p[28] - 4*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[9]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[16] + 2*p[0]*p[10]*p[10]*p[25] + 4*p[0]*p[11]*p[14]*p[19] - 2*p[0]*p[11]*p[14]*p[28] - 4*p[0]*p[12]*p[13]*p[19] + 2*p[0]*p[12]*p[13]*p[28] + 4*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[13]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[16] - 2*p[0]*p[14]*p[14]*p[25] + 2*p[7]*p[11]*p[16] - 2*p[7]*p[11]*p[25] - 2*p[7]*p[14]*p[19] + 2*p[7]*p[14]*p[28] + 2*p[8]*p[12]*p[16] - 2*p[8]*p[12]*p[25] + 2*p[8]*p[13]*p[19] - 2*p[8]*p[13]*p[28] + 2*p[9]*p[12]*p[19] - 2*p[9]*p[12]*p[28] - 2*p[9]*p[13]*p[16] + 2*p[9]*p[13]*p[25] - 2*p[10]*p[11]*p[19] + 2*p[10]*p[11]*p[28] - 2*p[10]*p[14]*p[16] + 2*p[10]*p[14]*p[25];
coeff[45] = 4*p[0]*p[7]*p[9]*p[22] - 2*p[0]*p[7]*p[9]*p[31] - 4*p[0]*p[7]*p[10]*p[20] + 2*p[0]*p[7]*p[10]*p[29] + 4*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[9]*p[29] + 4*p[0]*p[8]*p[10]*p[22] - 2*p[0]*p[8]*p[10]*p[31] - 4*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[9]*p[9]*p[26] - 4*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[10]*p[10]*p[26] - 4*p[0]*p[11]*p[13]*p[22] + 2*p[0]*p[11]*p[13]*p[31] + 4*p[0]*p[11]*p[14]*p[20] - 2*p[0]*p[11]*p[14]*p[29] - 4*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[13]*p[29] - 4*p[0]*p[12]*p[14]*p[22] + 2*p[0]*p[12]*p[14]*p[31] + 4*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[13]*p[13]*p[26] + 4*p[0]*p[14]*p[14]*p[17] - 2*p[0]*p[14]*p[14]*p[26] + 2*p[7]*p[11]*p[17] - 2*p[7]*p[11]*p[26] + 2*p[7]*p[13]*p[22] - 2*p[7]*p[13]*p[31] - 2*p[7]*p[14]*p[20] + 2*p[7]*p[14]*p[29] + 2*p[8]*p[12]*p[17] - 2*p[8]*p[12]*p[26] + 2*p[8]*p[13]*p[20] - 2*p[8]*p[13]*p[29] + 2*p[8]*p[14]*p[22] - 2*p[8]*p[14]*p[31] + 2*p[9]*p[11]*p[22] - 2*p[9]*p[11]*p[31] + 2*p[9]*p[12]*p[20] - 2*p[9]*p[12]*p[29] - 2*p[9]*p[13]*p[17] + 2*p[9]*p[13]*p[26] - 2*p[10]*p[11]*p[20] + 2*p[10]*p[11]*p[29] + 2*p[10]*p[12]*p[22] - 2*p[10]*p[12]*p[31] - 2*p[10]*p[14]*p[17] + 2*p[10]*p[14]*p[26];
coeff[46] = 2*p[0]*p[9]*p[9]*p[15] + 2*p[0]*p[10]*p[10]*p[15] - 2*p[0]*p[13]*p[13]*p[15] - 2*p[0]*p[14]*p[14]*p[15] - 2*p[7]*p[11]*p[15] + 2*p[7]*p[11]*p[24] - 2*p[8]*p[12]*p[15] + 2*p[8]*p[12]*p[24] + 2*p[9]*p[13]*p[15] - 2*p[9]*p[13]*p[24] + 2*p[10]*p[14]*p[15] - 2*p[10]*p[14]*p[24];
coeff[47] = 2*p[0]*p[7]*p[10]*p[19] - 2*p[0]*p[8]*p[9]*p[19] + 2*p[0]*p[9]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[16] - 2*p[0]*p[11]*p[14]*p[19] + 2*p[0]*p[12]*p[13]*p[19] - 2*p[0]*p[13]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[16] - 2*p[7]*p[11]*p[16] + 2*p[7]*p[11]*p[25] + 2*p[7]*p[14]*p[19] - 2*p[7]*p[14]*p[28] - 2*p[8]*p[12]*p[16] + 2*p[8]*p[12]*p[25] - 2*p[8]*p[13]*p[19] + 2*p[8]*p[13]*p[28] - 2*p[9]*p[12]*p[19] + 2*p[9]*p[12]*p[28] + 2*p[9]*p[13]*p[16] - 2*p[9]*p[13]*p[25] + 2*p[10]*p[11]*p[19] - 2*p[10]*p[11]*p[28] + 2*p[10]*p[14]*p[16] - 2*p[10]*p[14]*p[25];
coeff[48] = -2*p[0]*p[7]*p[9]*p[22] + 2*p[0]*p[7]*p[10]*p[20] - 2*p[0]*p[8]*p[9]*p[20] - 2*p[0]*p[8]*p[10]*p[22] + 2*p[0]*p[9]*p[9]*p[17] + 2*p[0]*p[10]*p[10]*p[17] + 2*p[0]*p[11]*p[13]*p[22] - 2*p[0]*p[11]*p[14]*p[20] + 2*p[0]*p[12]*p[13]*p[20] + 2*p[0]*p[12]*p[14]*p[22] - 2*p[0]*p[13]*p[13]*p[17] - 2*p[0]*p[14]*p[14]*p[17] - 2*p[7]*p[11]*p[17] + 2*p[7]*p[11]*p[26] - 2*p[7]*p[13]*p[22] + 2*p[7]*p[13]*p[31] + 2*p[7]*p[14]*p[20] - 2*p[7]*p[14]*p[29] - 2*p[8]*p[12]*p[17] + 2*p[8]*p[12]*p[26] - 2*p[8]*p[13]*p[20] + 2*p[8]*p[13]*p[29] - 2*p[8]*p[14]*p[22] + 2*p[8]*p[14]*p[31] - 2*p[9]*p[11]*p[22] + 2*p[9]*p[11]*p[31] - 2*p[9]*p[12]*p[20] + 2*p[9]*p[12]*p[29] + 2*p[9]*p[13]*p[17] - 2*p[9]*p[13]*p[26] + 2*p[10]*p[11]*p[20] - 2*p[10]*p[11]*p[29] - 2*p[10]*p[12]*p[22] + 2*p[10]*p[12]*p[31] + 2*p[10]*p[14]*p[17] - 2*p[10]*p[14]*p[26];
coeff[49] = 0;
coeff[50] = 2*(p[9]*p[9]*p[15] - p[9]*p[9]*p[24] + p[10]*p[10]*p[15] - p[10]*p[10]*p[24] - p[13]*p[13]*p[15] + p[13]*p[13]*p[24] - p[14]*p[14]*p[15] + p[14]*p[14]*p[24])*p[0];
coeff[51] = 2*(p[7]*p[10]*p[19] - p[7]*p[10]*p[28] - p[8]*p[9]*p[19] + p[8]*p[9]*p[28] + p[9]*p[9]*p[16] - p[9]*p[9]*p[25] + p[10]*p[10]*p[16] - p[10]*p[10]*p[25] - p[11]*p[14]*p[19] + p[11]*p[14]*p[28] + p[12]*p[13]*p[19] - p[12]*p[13]*p[28] - p[13]*p[13]*p[16] + p[13]*p[13]*p[25] - p[14]*p[14]*p[16] + p[14]*p[14]*p[25])*p[0];
coeff[52] = 2*(-p[7]*p[9]*p[22] + p[7]*p[9]*p[31] + p[7]*p[10]*p[20] - p[7]*p[10]*p[29] - p[8]*p[9]*p[20] + p[8]*p[9]*p[29] - p[8]*p[10]*p[22] + p[8]*p[10]*p[31] + p[9]*p[9]*p[17] - p[9]*p[9]*p[26] + p[10]*p[10]*p[17] - p[10]*p[10]*p[26] + p[11]*p[13]*p[22] - p[11]*p[13]*p[31] - p[11]*p[14]*p[20] + p[11]*p[14]*p[29] + p[12]*p[13]*p[20] - p[12]*p[13]*p[29] + p[12]*p[14]*p[22] - p[12]*p[14]*p[31] - p[13]*p[13]*p[17] + p[13]*p[13]*p[26] - p[14]*p[14]*p[17] + p[14]*p[14]*p[26])*p[0];
coeff[53] = 2*(-p[9]*p[9]*p[15] + p[9]*p[9]*p[24] - p[10]*p[10]*p[15] + p[10]*p[10]*p[24] + p[13]*p[13]*p[15] - p[13]*p[13]*p[24] + p[14]*p[14]*p[15] - p[14]*p[14]*p[24])*p[0];
coeff[54] = 2*(-p[7]*p[10]*p[19] + p[7]*p[10]*p[28] + p[8]*p[9]*p[19] - p[8]*p[9]*p[28] - p[9]*p[9]*p[16] + p[9]*p[9]*p[25] - p[10]*p[10]*p[16] + p[10]*p[10]*p[25] + p[11]*p[14]*p[19] - p[11]*p[14]*p[28] - p[12]*p[13]*p[19] + p[12]*p[13]*p[28] + p[13]*p[13]*p[16] - p[13]*p[13]*p[25] + p[14]*p[14]*p[16] - p[14]*p[14]*p[25])*p[0];
coeff[55] = 2*(p[7]*p[9]*p[22] - p[7]*p[9]*p[31] - p[7]*p[10]*p[20] + p[7]*p[10]*p[29] + p[8]*p[9]*p[20] - p[8]*p[9]*p[29] + p[8]*p[10]*p[22] - p[8]*p[10]*p[31] - p[9]*p[9]*p[17] + p[9]*p[9]*p[26] - p[10]*p[10]*p[17] + p[10]*p[10]*p[26] - p[11]*p[13]*p[22] + p[11]*p[13]*p[31] + p[11]*p[14]*p[20] - p[11]*p[14]*p[29] - p[12]*p[13]*p[20] + p[12]*p[13]*p[29] - p[12]*p[14]*p[22] + p[12]*p[14]*p[31] + p[13]*p[13]*p[17] - p[13]*p[13]*p[26] + p[14]*p[14]*p[17] - p[14]*p[14]*p[26])*p[0];
coeff[56] = -p[2] + p[5] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30] - p[21] + p[30];
coeff[57] = -2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + p[12]*p[13]*p[24];
coeff[58] = -2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - p[14]*p[14]*p[28] - 2*p[19] + p[28];
coeff[59] = 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - p[14]*p[14]*p[29] - 2*p[20] + p[29];
coeff[60] = (p[7]*p[10] + p[8]*p[9] + p[11]*p[14] + p[12]*p[13])*p[15];
coeff[61] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] + p[11]*p[14]*p[16] - p[12]*p[12]*p[19] + p[12]*p[13]*p[16] - p[14]*p[14]*p[19] + p[19];
coeff[62] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] - p[11]*p[12]*p[22] + p[11]*p[14]*p[17] - p[12]*p[12]*p[20] + p[12]*p[13]*p[17] + p[13]*p[14]*p[22] - p[14]*p[14]*p[20] + p[20];
coeff[63] = 0;
coeff[64] = 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
coeff[65] = 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28] + 2*p[19] - 2*p[28];
coeff[66] = -2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29] + 2*p[20] - 2*p[29];
coeff[67] = -2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
coeff[68] = -2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28] - 2*p[19] + 2*p[28];
coeff[69] = 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29] - 2*p[20] + 2*p[29];
coeff[70] = 2*p[0]*p[7]*p[11]*p[21] - 2*p[0]*p[7]*p[12]*p[23] + 2*p[0]*p[7]*p[14]*p[18] - 2*p[0]*p[8]*p[11]*p[23] - 2*p[0]*p[8]*p[12]*p[21] + 2*p[0]*p[8]*p[13]*p[18] + 2*p[0]*p[9]*p[12]*p[18] + 2*p[0]*p[9]*p[13]*p[21] + 2*p[0]*p[9]*p[14]*p[23] + 2*p[0]*p[10]*p[11]*p[18] + 2*p[0]*p[10]*p[13]*p[23] - 2*p[0]*p[10]*p[14]*p[21] + p[7]*p[8]*p[23] - p[7]*p[8]*p[32] - p[7]*p[10]*p[18] + p[7]*p[10]*p[27] + p[8]*p[8]*p[21] - p[8]*p[8]*p[30] - p[8]*p[9]*p[18] + p[8]*p[9]*p[27] - p[9]*p[10]*p[23] + p[9]*p[10]*p[32] + p[10]*p[10]*p[21] - p[10]*p[10]*p[30] - p[11]*p[12]*p[23] + p[11]*p[12]*p[32] + p[11]*p[14]*p[18] - p[11]*p[14]*p[27] - p[12]*p[12]*p[21] + p[12]*p[12]*p[30] + p[12]*p[13]*p[18] - p[12]*p[13]*p[27] + p[13]*p[14]*p[23] - p[13]*p[14]*p[32] - p[14]*p[14]*p[21] + p[14]*p[14]*p[30];
coeff[71] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - p[12]*p[13]*p[24];
coeff[72] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + p[14]*p[14]*p[28];
coeff[73] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + p[14]*p[14]*p[29];
coeff[74] = (p[7]*p[10] + p[8]*p[9] - p[11]*p[14] - p[12]*p[13])*p[15];
coeff[75] = p[7]*p[10]*p[16] - p[8]*p[8]*p[19] + p[8]*p[9]*p[16] - p[10]*p[10]*p[19] - p[11]*p[14]*p[16] + p[12]*p[12]*p[19] - p[12]*p[13]*p[16] + p[14]*p[14]*p[19];
coeff[76] = -p[7]*p[8]*p[22] + p[7]*p[10]*p[17] - p[8]*p[8]*p[20] + p[8]*p[9]*p[17] + p[9]*p[10]*p[22] - p[10]*p[10]*p[20] + p[11]*p[12]*p[22] - p[11]*p[14]*p[17] + p[12]*p[12]*p[20] - p[12]*p[13]*p[17] - p[13]*p[14]*p[22] + p[14]*p[14]*p[20];
coeff[77] = 2*(-p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30])*p[0];
coeff[78] = -4*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[7]*p[14]*p[24] - 4*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[8]*p[13]*p[24] - 4*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[9]*p[12]*p[24] - 4*p[0]*p[10]*p[11]*p[15] + 2*p[0]*p[10]*p[11]*p[24] + 2*p[7]*p[10]*p[15] - 2*p[7]*p[10]*p[24] + 2*p[8]*p[9]*p[15] - 2*p[8]*p[9]*p[24] - 2*p[11]*p[14]*p[15] + 2*p[11]*p[14]*p[24] - 2*p[12]*p[13]*p[15] + 2*p[12]*p[13]*p[24];
coeff[79] = -4*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[11]*p[28] - 4*p[0]*p[7]*p[14]*p[16] + 2*p[0]*p[7]*p[14]*p[25] + 4*p[0]*p[8]*p[12]*p[19] - 2*p[0]*p[8]*p[12]*p[28] - 4*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[8]*p[13]*p[25] - 4*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[12]*p[25] - 4*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[9]*p[13]*p[28] - 4*p[0]*p[10]*p[11]*p[16] + 2*p[0]*p[10]*p[11]*p[25] + 4*p[0]*p[10]*p[14]*p[19] - 2*p[0]*p[10]*p[14]*p[28] + 2*p[7]*p[10]*p[16] - 2*p[7]*p[10]*p[25] - 2*p[8]*p[8]*p[19] + 2*p[8]*p[8]*p[28] + 2*p[8]*p[9]*p[16] - 2*p[8]*p[9]*p[25] - 2*p[10]*p[10]*p[19] + 2*p[10]*p[10]*p[28] - 2*p[11]*p[14]*p[16] + 2*p[11]*p[14]*p[25] + 2*p[12]*p[12]*p[19] - 2*p[12]*p[12]*p[28] - 2*p[12]*p[13]*p[16] + 2*p[12]*p[13]*p[25] + 2*p[14]*p[14]*p[19] - 2*p[14]*p[14]*p[28];
coeff[80] = -4*p[0]*p[7]*p[11]*p[20] + 2*p[0]*p[7]*p[11]*p[29] + 4*p[0]*p[7]*p[12]*p[22] - 2*p[0]*p[7]*p[12]*p[31] - 4*p[0]*p[7]*p[14]*p[17] + 2*p[0]*p[7]*p[14]*p[26] + 4*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[11]*p[31] + 4*p[0]*p[8]*p[12]*p[20] - 2*p[0]*p[8]*p[12]*p[29] - 4*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[8]*p[13]*p[26] - 4*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[12]*p[26] - 4*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[13]*p[29] - 4*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[9]*p[14]*p[31] - 4*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[11]*p[26] - 4*p[0]*p[10]*p[13]*p[22] + 2*p[0]*p[10]*p[13]*p[31] + 4*p[0]*p[10]*p[14]*p[20] - 2*p[0]*p[10]*p[14]*p[29] - 2*p[7]*p[8]*p[22] + 2*p[7]*p[8]*p[31] + 2*p[7]*p[10]*p[17] - 2*p[7]*p[10]*p[26] - 2*p[8]*p[8]*p[20] + 2*p[8]*p[8]*p[29] + 2*p[8]*p[9]*p[17] - 2*p[8]*p[9]*p[26] + 2*p[9]*p[10]*p[22] - 2*p[9]*p[10]*p[31] - 2*p[10]*p[10]*p[20] + 2*p[10]*p[10]*p[29] + 2*p[11]*p[12]*p[22] - 2*p[11]*p[12]*p[31] - 2*p[11]*p[14]*p[17] + 2*p[11]*p[14]*p[26] + 2*p[12]*p[12]*p[20] - 2*p[12]*p[12]*p[29] - 2*p[12]*p[13]*p[17] + 2*p[12]*p[13]*p[26] - 2*p[13]*p[14]*p[22] + 2*p[13]*p[14]*p[31] + 2*p[14]*p[14]*p[20] - 2*p[14]*p[14]*p[29];
coeff[81] = 2*p[0]*p[7]*p[14]*p[15] + 2*p[0]*p[8]*p[13]*p[15] + 2*p[0]*p[9]*p[12]*p[15] + 2*p[0]*p[10]*p[11]*p[15] - 2*p[7]*p[10]*p[15] + 2*p[7]*p[10]*p[24] - 2*p[8]*p[9]*p[15] + 2*p[8]*p[9]*p[24] + 2*p[11]*p[14]*p[15] - 2*p[11]*p[14]*p[24] + 2*p[12]*p[13]*p[15] - 2*p[12]*p[13]*p[24];
coeff[82] = 2*p[0]*p[7]*p[11]*p[19] + 2*p[0]*p[7]*p[14]*p[16] - 2*p[0]*p[8]*p[12]*p[19] + 2*p[0]*p[8]*p[13]*p[16] + 2*p[0]*p[9]*p[12]*p[16] + 2*p[0]*p[9]*p[13]*p[19] + 2*p[0]*p[10]*p[11]*p[16] - 2*p[0]*p[10]*p[14]*p[19] - 2*p[7]*p[10]*p[16] + 2*p[7]*p[10]*p[25] + 2*p[8]*p[8]*p[19] - 2*p[8]*p[8]*p[28] - 2*p[8]*p[9]*p[16] + 2*p[8]*p[9]*p[25] + 2*p[10]*p[10]*p[19] - 2*p[10]*p[10]*p[28] + 2*p[11]*p[14]*p[16] - 2*p[11]*p[14]*p[25] - 2*p[12]*p[12]*p[19] + 2*p[12]*p[12]*p[28] + 2*p[12]*p[13]*p[16] - 2*p[12]*p[13]*p[25] - 2*p[14]*p[14]*p[19] + 2*p[14]*p[14]*p[28];
coeff[83] = 2*p[0]*p[7]*p[11]*p[20] - 2*p[0]*p[7]*p[12]*p[22] + 2*p[0]*p[7]*p[14]*p[17] - 2*p[0]*p[8]*p[11]*p[22] - 2*p[0]*p[8]*p[12]*p[20] + 2*p[0]*p[8]*p[13]*p[17] + 2*p[0]*p[9]*p[12]*p[17] + 2*p[0]*p[9]*p[13]*p[20] + 2*p[0]*p[9]*p[14]*p[22] + 2*p[0]*p[10]*p[11]*p[17] + 2*p[0]*p[10]*p[13]*p[22] - 2*p[0]*p[10]*p[14]*p[20] + 2*p[7]*p[8]*p[22] - 2*p[7]*p[8]*p[31] - 2*p[7]*p[10]*p[17] + 2*p[7]*p[10]*p[26] + 2*p[8]*p[8]*p[20] - 2*p[8]*p[8]*p[29] - 2*p[8]*p[9]*p[17] + 2*p[8]*p[9]*p[26] - 2*p[9]*p[10]*p[22] + 2*p[9]*p[10]*p[31] + 2*p[10]*p[10]*p[20] - 2*p[10]*p[10]*p[29] - 2*p[11]*p[12]*p[22] + 2*p[11]*p[12]*p[31] + 2*p[11]*p[14]*p[17] - 2*p[11]*p[14]*p[26] - 2*p[12]*p[12]*p[20] + 2*p[12]*p[12]*p[29] + 2*p[12]*p[13]*p[17] - 2*p[12]*p[13]*p[26] + 2*p[13]*p[14]*p[22] - 2*p[13]*p[14]*p[31] - 2*p[14]*p[14]*p[20] + 2*p[14]*p[14]*p[29];
coeff[84] = 0;
coeff[85] = 2*(p[7]*p[14]*p[15] - p[7]*p[14]*p[24] + p[8]*p[13]*p[15] - p[8]*p[13]*p[24] + p[9]*p[12]*p[15] - p[9]*p[12]*p[24] + p[10]*p[11]*p[15] - p[10]*p[11]*p[24])*p[0];
coeff[86] = 2*(p[7]*p[11]*p[19] - p[7]*p[11]*p[28] + p[7]*p[14]*p[16] - p[7]*p[14]*p[25] - p[8]*p[12]*p[19] + p[8]*p[12]*p[28] + p[8]*p[13]*p[16] - p[8]*p[13]*p[25] + p[9]*p[12]*p[16] - p[9]*p[12]*p[25] + p[9]*p[13]*p[19] - p[9]*p[13]*p[28] + p[10]*p[11]*p[16] - p[10]*p[11]*p[25] - p[10]*p[14]*p[19] + p[10]*p[14]*p[28])*p[0];
coeff[87] = 2*(p[7]*p[11]*p[20] - p[7]*p[11]*p[29] - p[7]*p[12]*p[22] + p[7]*p[12]*p[31] + p[7]*p[14]*p[17] - p[7]*p[14]*p[26] - p[8]*p[11]*p[22] + p[8]*p[11]*p[31] - p[8]*p[12]*p[20] + p[8]*p[12]*p[29] + p[8]*p[13]*p[17] - p[8]*p[13]*p[26] + p[9]*p[12]*p[17] - p[9]*p[12]*p[26] + p[9]*p[13]*p[20] - p[9]*p[13]*p[29] + p[9]*p[14]*p[22] - p[9]*p[14]*p[31] + p[10]*p[11]*p[17] - p[10]*p[11]*p[26] + p[10]*p[13]*p[22] - p[10]*p[13]*p[31] - p[10]*p[14]*p[20] + p[10]*p[14]*p[29])*p[0];
coeff[88] = 2*(-p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - p[10]*p[11]*p[15] + p[10]*p[11]*p[24])*p[0];
coeff[89] = 2*(-p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + p[10]*p[14]*p[19] - p[10]*p[14]*p[28])*p[0];
coeff[90] = 2*(-p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + p[10]*p[14]*p[20] - p[10]*p[14]*p[29])*p[0];
coeff[91] = 2*p[0]*p[7]*p[8]*p[23] - 2*p[0]*p[7]*p[10]*p[18] + 2*p[0]*p[8]*p[8]*p[21] - 2*p[0]*p[8]*p[9]*p[18] - 2*p[0]*p[9]*p[10]*p[23] + 2*p[0]*p[10]*p[10]*p[21] - 2*p[0]*p[11]*p[12]*p[23] + 2*p[0]*p[11]*p[14]*p[18] - 2*p[0]*p[12]*p[12]*p[21] + 2*p[0]*p[12]*p[13]*p[18] + 2*p[0]*p[13]*p[14]*p[23] - 2*p[0]*p[14]*p[14]*p[21] - p[7]*p[11]*p[21] + p[7]*p[11]*p[30] + p[7]*p[12]*p[23] - p[7]*p[12]*p[32] - p[7]*p[14]*p[18] + p[7]*p[14]*p[27] + p[8]*p[11]*p[23] - p[8]*p[11]*p[32] + p[8]*p[12]*p[21] - p[8]*p[12]*p[30] - p[8]*p[13]*p[18] + p[8]*p[13]*p[27] - p[9]*p[12]*p[18] + p[9]*p[12]*p[27] - p[9]*p[13]*p[21] + p[9]*p[13]*p[30] - p[9]*p[14]*p[23] + p[9]*p[14]*p[32] - p[10]*p[11]*p[18] + p[10]*p[11]*p[27] - p[10]*p[13]*p[23] + p[10]*p[13]*p[32] + p[10]*p[14]*p[21] - p[10]*p[14]*p[30];
coeff[92] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + p[10]*p[11]*p[24];
coeff[93] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - p[10]*p[14]*p[28];
coeff[94] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - p[10]*p[14]*p[29];
coeff[95] = (p[7]*p[14] + p[8]*p[13] + p[9]*p[12] + p[10]*p[11])*p[15];
coeff[96] = p[7]*p[11]*p[19] + p[7]*p[14]*p[16] - p[8]*p[12]*p[19] + p[8]*p[13]*p[16] + p[9]*p[12]*p[16] + p[9]*p[13]*p[19] + p[10]*p[11]*p[16] - p[10]*p[14]*p[19];
coeff[97] = p[7]*p[11]*p[20] - p[7]*p[12]*p[22] + p[7]*p[14]*p[17] - p[8]*p[11]*p[22] - p[8]*p[12]*p[20] + p[8]*p[13]*p[17] + p[9]*p[12]*p[17] + p[9]*p[13]*p[20] + p[9]*p[14]*p[22] + p[10]*p[11]*p[17] + p[10]*p[13]*p[22] - p[10]*p[14]*p[20];
coeff[98] = 2*(-p[7]*p[8]*p[23] + p[7]*p[8]*p[32] + p[7]*p[10]*p[18] - p[7]*p[10]*p[27] - p[8]*p[8]*p[21] + p[8]*p[8]*p[30] + p[8]*p[9]*p[18] - p[8]*p[9]*p[27] + p[9]*p[10]*p[23] - p[9]*p[10]*p[32] - p[10]*p[10]*p[21] + p[10]*p[10]*p[30] + p[11]*p[12]*p[23] - p[11]*p[12]*p[32] - p[11]*p[14]*p[18] + p[11]*p[14]*p[27] + p[12]*p[12]*p[21] - p[12]*p[12]*p[30] - p[12]*p[13]*p[18] + p[12]*p[13]*p[27] - p[13]*p[14]*p[23] + p[13]*p[14]*p[32] + p[14]*p[14]*p[21] - p[14]*p[14]*p[30])*p[0];
coeff[99] = 4*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[7]*p[10]*p[24] + 4*p[0]*p[8]*p[9]*p[15] - 2*p[0]*p[8]*p[9]*p[24] - 4*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[11]*p[14]*p[24] - 4*p[0]*p[12]*p[13]*p[15] + 2*p[0]*p[12]*p[13]*p[24] + 2*p[7]*p[14]*p[15] - 2*p[7]*p[14]*p[24] + 2*p[8]*p[13]*p[15] - 2*p[8]*p[13]*p[24] + 2*p[9]*p[12]*p[15] - 2*p[9]*p[12]*p[24] + 2*p[10]*p[11]*p[15] - 2*p[10]*p[11]*p[24];
coeff[100] = 4*p[0]*p[7]*p[10]*p[16] - 2*p[0]*p[7]*p[10]*p[25] - 4*p[0]*p[8]*p[8]*p[19] + 2*p[0]*p[8]*p[8]*p[28] + 4*p[0]*p[8]*p[9]*p[16] - 2*p[0]*p[8]*p[9]*p[25] - 4*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[10]*p[10]*p[28] - 4*p[0]*p[11]*p[14]*p[16] + 2*p[0]*p[11]*p[14]*p[25] + 4*p[0]*p[12]*p[12]*p[19] - 2*p[0]*p[12]*p[12]*p[28] - 4*p[0]*p[12]*p[13]*p[16] + 2*p[0]*p[12]*p[13]*p[25] + 4*p[0]*p[14]*p[14]*p[19] - 2*p[0]*p[14]*p[14]*p[28] + 2*p[7]*p[11]*p[19] - 2*p[7]*p[11]*p[28] + 2*p[7]*p[14]*p[16] - 2*p[7]*p[14]*p[25] - 2*p[8]*p[12]*p[19] + 2*p[8]*p[12]*p[28] + 2*p[8]*p[13]*p[16] - 2*p[8]*p[13]*p[25] + 2*p[9]*p[12]*p[16] - 2*p[9]*p[12]*p[25] + 2*p[9]*p[13]*p[19] - 2*p[9]*p[13]*p[28] + 2*p[10]*p[11]*p[16] - 2*p[10]*p[11]*p[25] - 2*p[10]*p[14]*p[19] + 2*p[10]*p[14]*p[28];
coeff[101] = -4*p[0]*p[7]*p[8]*p[22] + 2*p[0]*p[7]*p[8]*p[31] + 4*p[0]*p[7]*p[10]*p[17] - 2*p[0]*p[7]*p[10]*p[26] - 4*p[0]*p[8]*p[8]*p[20] + 2*p[0]*p[8]*p[8]*p[29] + 4*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[8]*p[9]*p[26] + 4*p[0]*p[9]*p[10]*p[22] - 2*p[0]*p[9]*p[10]*p[31] - 4*p[0]*p[10]*p[10]*p[20] + 2*p[0]*p[10]*p[10]*p[29] + 4*p[0]*p[11]*p[12]*p[22] - 2*p[0]*p[11]*p[12]*p[31] - 4*p[0]*p[11]*p[14]*p[17] + 2*p[0]*p[11]*p[14]*p[26] + 4*p[0]*p[12]*p[12]*p[20] - 2*p[0]*p[12]*p[12]*p[29] - 4*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[12]*p[13]*p[26] - 4*p[0]*p[13]*p[14]*p[22] + 2*p[0]*p[13]*p[14]*p[31] + 4*p[0]*p[14]*p[14]*p[20] - 2*p[0]*p[14]*p[14]*p[29] + 2*p[7]*p[11]*p[20] - 2*p[7]*p[11]*p[29] - 2*p[7]*p[12]*p[22] + 2*p[7]*p[12]*p[31] + 2*p[7]*p[14]*p[17] - 2*p[7]*p[14]*p[26] - 2*p[8]*p[11]*p[22] + 2*p[8]*p[11]*p[31] - 2*p[8]*p[12]*p[20] + 2*p[8]*p[12]*p[29] + 2*p[8]*p[13]*p[17] - 2*p[8]*p[13]*p[26] + 2*p[9]*p[12]*p[17] - 2*p[9]*p[12]*p[26] + 2*p[9]*p[13]*p[20] - 2*p[9]*p[13]*p[29] + 2*p[9]*p[14]*p[22] - 2*p[9]*p[14]*p[31] + 2*p[10]*p[11]*p[17] - 2*p[10]*p[11]*p[26] + 2*p[10]*p[13]*p[22] - 2*p[10]*p[13]*p[31] - 2*p[10]*p[14]*p[20] + 2*p[10]*p[14]*p[29];
coeff[102] = -2*p[0]*p[7]*p[10]*p[15] - 2*p[0]*p[8]*p[9]*p[15] + 2*p[0]*p[11]*p[14]*p[15] + 2*p[0]*p[12]*p[13]*p[15] - 2*p[7]*p[14]*p[15] + 2*p[7]*p[14]*p[24] - 2*p[8]*p[13]*p[15] + 2*p[8]*p[13]*p[24] - 2*p[9]*p[12]*p[15] + 2*p[9]*p[12]*p[24] - 2*p[10]*p[11]*p[15] + 2*p[10]*p[11]*p[24];
coeff[103] = -2*p[0]*p[7]*p[10]*p[16] + 2*p[0]*p[8]*p[8]*p[19] - 2*p[0]*p[8]*p[9]*p[16] + 2*p[0]*p[10]*p[10]*p[19] + 2*p[0]*p[11]*p[14]*p[16] - 2*p[0]*p[12]*p[12]*p[19] + 2*p[0]*p[12]*p[13]*p[16] - 2*p[0]*p[14]*p[14]*p[19] - 2*p[7]*p[11]*p[19] + 2*p[7]*p[11]*p[28] - 2*p[7]*p[14]*p[16] + 2*p[7]*p[14]*p[25] + 2*p[8]*p[12]*p[19] - 2*p[8]*p[12]*p[28] - 2*p[8]*p[13]*p[16] + 2*p[8]*p[13]*p[25] - 2*p[9]*p[12]*p[16] + 2*p[9]*p[12]*p[25] - 2*p[9]*p[13]*p[19] + 2*p[9]*p[13]*p[28] - 2*p[10]*p[11]*p[16] + 2*p[10]*p[11]*p[25] + 2*p[10]*p[14]*p[19] - 2*p[10]*p[14]*p[28];
coeff[104] = 2*p[0]*p[7]*p[8]*p[22] - 2*p[0]*p[7]*p[10]*p[17] + 2*p[0]*p[8]*p[8]*p[20] - 2*p[0]*p[8]*p[9]*p[17] - 2*p[0]*p[9]*p[10]*p[22] + 2*p[0]*p[10]*p[10]*p[20] - 2*p[0]*p[11]*p[12]*p[22] + 2*p[0]*p[11]*p[14]*p[17] - 2*p[0]*p[12]*p[12]*p[20] + 2*p[0]*p[12]*p[13]*p[17] + 2*p[0]*p[13]*p[14]*p[22] - 2*p[0]*p[14]*p[14]*p[20] - 2*p[7]*p[11]*p[20] + 2*p[7]*p[11]*p[29] + 2*p[7]*p[12]*p[22] - 2*p[7]*p[12]*p[31] - 2*p[7]*p[14]*p[17] + 2*p[7]*p[14]*p[26] + 2*p[8]*p[11]*p[22] - 2*p[8]*p[11]*p[31] + 2*p[8]*p[12]*p[20] - 2*p[8]*p[12]*p[29] - 2*p[8]*p[13]*p[17] + 2*p[8]*p[13]*p[26] - 2*p[9]*p[12]*p[17] + 2*p[9]*p[12]*p[26] - 2*p[9]*p[13]*p[20] + 2*p[9]*p[13]*p[29] - 2*p[9]*p[14]*p[22] + 2*p[9]*p[14]*p[31] - 2*p[10]*p[11]*p[17] + 2*p[10]*p[11]*p[26] - 2*p[10]*p[13]*p[22] + 2*p[10]*p[13]*p[31] + 2*p[10]*p[14]*p[20] - 2*p[10]*p[14]*p[29];
coeff[105] = 0;
coeff[106] = 2*(-p[7]*p[10]*p[15] + p[7]*p[10]*p[24] - p[8]*p[9]*p[15] + p[8]*p[9]*p[24] + p[11]*p[14]*p[15] - p[11]*p[14]*p[24] + p[12]*p[13]*p[15] - p[12]*p[13]*p[24])*p[0];
coeff[107] = 2*(-p[7]*p[10]*p[16] + p[7]*p[10]*p[25] + p[8]*p[8]*p[19] - p[8]*p[8]*p[28] - p[8]*p[9]*p[16] + p[8]*p[9]*p[25] + p[10]*p[10]*p[19] - p[10]*p[10]*p[28] + p[11]*p[14]*p[16] - p[11]*p[14]*p[25] - p[12]*p[12]*p[19] + p[12]*p[12]*p[28] + p[12]*p[13]*p[16] - p[12]*p[13]*p[25] - p[14]*p[14]*p[19] + p[14]*p[14]*p[28])*p[0];
coeff[108] = 2*(p[7]*p[8]*p[22] - p[7]*p[8]*p[31] - p[7]*p[10]*p[17] + p[7]*p[10]*p[26] + p[8]*p[8]*p[20] - p[8]*p[8]*p[29] - p[8]*p[9]*p[17] + p[8]*p[9]*p[26] - p[9]*p[10]*p[22] + p[9]*p[10]*p[31] + p[10]*p[10]*p[20] - p[10]*p[10]*p[29] - p[11]*p[12]*p[22] + p[11]*p[12]*p[31] + p[11]*p[14]*p[17] - p[11]*p[14]*p[26] - p[12]*p[12]*p[20] + p[12]*p[12]*p[29] + p[12]*p[13]*p[17] - p[12]*p[13]*p[26] + p[13]*p[14]*p[22] - p[13]*p[14]*p[31] - p[14]*p[14]*p[20] + p[14]*p[14]*p[29])*p[0];
coeff[109] = 2*(p[7]*p[10]*p[15] - p[7]*p[10]*p[24] + p[8]*p[9]*p[15] - p[8]*p[9]*p[24] - p[11]*p[14]*p[15] + p[11]*p[14]*p[24] - p[12]*p[13]*p[15] + p[12]*p[13]*p[24])*p[0];
coeff[110] = 2*(p[7]*p[10]*p[16] - p[7]*p[10]*p[25] - p[8]*p[8]*p[19] + p[8]*p[8]*p[28] + p[8]*p[9]*p[16] - p[8]*p[9]*p[25] - p[10]*p[10]*p[19] + p[10]*p[10]*p[28] - p[11]*p[14]*p[16] + p[11]*p[14]*p[25] + p[12]*p[12]*p[19] - p[12]*p[12]*p[28] - p[12]*p[13]*p[16] + p[12]*p[13]*p[25] + p[14]*p[14]*p[19] - p[14]*p[14]*p[28])*p[0];
coeff[111] = 2*(-p[7]*p[8]*p[22] + p[7]*p[8]*p[31] + p[7]*p[10]*p[17] - p[7]*p[10]*p[26] - p[8]*p[8]*p[20] + p[8]*p[8]*p[29] + p[8]*p[9]*p[17] - p[8]*p[9]*p[26] + p[9]*p[10]*p[22] - p[9]*p[10]*p[31] - p[10]*p[10]*p[20] + p[10]*p[10]*p[29] + p[11]*p[12]*p[22] - p[11]*p[12]*p[31] - p[11]*p[14]*p[17] + p[11]*p[14]*p[26] + p[12]*p[12]*p[20] - p[12]*p[12]*p[29] - p[12]*p[13]*p[17] + p[12]*p[13]*p[26] - p[13]*p[14]*p[22] + p[13]*p[14]*p[31] + p[14]*p[14]*p[20] - p[14]*p[14]*p[29])*p[0];
coeff[112] = -p[3] + p[6] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30] - p[23] + p[32];
coeff[113] = 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + p[12]*p[14]*p[24];
coeff[114] = -2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + p[13]*p[14]*p[28];
coeff[115] = -2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + p[13]*p[14]*p[29] - 2*p[22] + p[31];
coeff[116] = (-p[7]*p[9] + p[8]*p[10] - p[11]*p[13] + p[12]*p[14])*p[15];
coeff[117] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] + p[11]*p[12]*p[19] - p[11]*p[13]*p[16] + p[12]*p[14]*p[16] + p[13]*p[14]*p[19];
coeff[118] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] + p[11]*p[12]*p[20] - p[11]*p[13]*p[17] - p[12]*p[12]*p[22] + p[12]*p[14]*p[17] - p[13]*p[13]*p[22] + p[13]*p[14]*p[20] + p[22];
coeff[119] = 0;
coeff[120] = -2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
coeff[121] = 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
coeff[122] = 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29] + 2*p[22] - 2*p[31];
coeff[123] = 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
coeff[124] = -2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
coeff[125] = -2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29] - 2*p[22] + 2*p[31];
coeff[126] = 2*p[0]*p[7]*p[11]*p[23] + 2*p[0]*p[7]*p[12]*p[21] - 2*p[0]*p[7]*p[13]*p[18] + 2*p[0]*p[8]*p[11]*p[21] - 2*p[0]*p[8]*p[12]*p[23] + 2*p[0]*p[8]*p[14]*p[18] - 2*p[0]*p[9]*p[11]*p[18] - 2*p[0]*p[9]*p[13]*p[23] + 2*p[0]*p[9]*p[14]*p[21] + 2*p[0]*p[10]*p[12]*p[18] + 2*p[0]*p[10]*p[13]*p[21] + 2*p[0]*p[10]*p[14]*p[23] - p[7]*p[8]*p[21] + p[7]*p[8]*p[30] + p[7]*p[9]*p[18] - p[7]*p[9]*p[27] + p[8]*p[8]*p[23] - p[8]*p[8]*p[32] - p[8]*p[10]*p[18] + p[8]*p[10]*p[27] + p[9]*p[9]*p[23] - p[9]*p[9]*p[32] - p[9]*p[10]*p[21] + p[9]*p[10]*p[30] + p[11]*p[12]*p[21] - p[11]*p[12]*p[30] - p[11]*p[13]*p[18] + p[11]*p[13]*p[27] - p[12]*p[12]*p[23] + p[12]*p[12]*p[32] + p[12]*p[14]*p[18] - p[12]*p[14]*p[27] - p[13]*p[13]*p[23] + p[13]*p[13]*p[32] + p[13]*p[14]*p[21] - p[13]*p[14]*p[30];
coeff[127] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - p[12]*p[14]*p[24];
coeff[128] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - p[13]*p[14]*p[28];
coeff[129] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - p[13]*p[14]*p[29];
coeff[130] = (-p[7]*p[9] + p[8]*p[10] + p[11]*p[13] - p[12]*p[14])*p[15];
coeff[131] = p[7]*p[8]*p[19] - p[7]*p[9]*p[16] + p[8]*p[10]*p[16] + p[9]*p[10]*p[19] - p[11]*p[12]*p[19] + p[11]*p[13]*p[16] - p[12]*p[14]*p[16] - p[13]*p[14]*p[19];
coeff[132] = p[7]*p[8]*p[20] - p[7]*p[9]*p[17] - p[8]*p[8]*p[22] + p[8]*p[10]*p[17] - p[9]*p[9]*p[22] + p[9]*p[10]*p[20] - p[11]*p[12]*p[20] + p[11]*p[13]*p[17] + p[12]*p[12]*p[22] - p[12]*p[14]*p[17] + p[13]*p[13]*p[22] - p[13]*p[14]*p[20];
coeff[133] = 2*(-p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32])*p[0];
coeff[134] = 4*p[0]*p[7]*p[13]*p[15] - 2*p[0]*p[7]*p[13]*p[24] - 4*p[0]*p[8]*p[14]*p[15] + 2*p[0]*p[8]*p[14]*p[24] + 4*p[0]*p[9]*p[11]*p[15] - 2*p[0]*p[9]*p[11]*p[24] - 4*p[0]*p[10]*p[12]*p[15] + 2*p[0]*p[10]*p[12]*p[24] - 2*p[7]*p[9]*p[15] + 2*p[7]*p[9]*p[24] + 2*p[8]*p[10]*p[15] - 2*p[8]*p[10]*p[24] + 2*p[11]*p[13]*p[15] - 2*p[11]*p[13]*p[24] - 2*p[12]*p[14]*p[15] + 2*p[12]*p[14]*p[24];
coeff[135] = -4*p[0]*p[7]*p[12]*p[19] + 2*p[0]*p[7]*p[12]*p[28] + 4*p[0]*p[7]*p[13]*p[16] - 2*p[0]*p[7]*p[13]*p[25] - 4*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[11]*p[28] - 4*p[0]*p[8]*p[14]*p[16] + 2*p[0]*p[8]*p[14]*p[25] + 4*p[0]*p[9]*p[11]*p[16] - 2*p[0]*p[9]*p[11]*p[25] - 4*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[9]*p[14]*p[28] - 4*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[12]*p[25] - 4*p[0]*p[10]*p[13]*p[19] + 2*p[0]*p[10]*p[13]*p[28] + 2*p[7]*p[8]*p[19] - 2*p[7]*p[8]*p[28] - 2*p[7]*p[9]*p[16] + 2*p[7]*p[9]*p[25] + 2*p[8]*p[10]*p[16] - 2*p[8]*p[10]*p[25] + 2*p[9]*p[10]*p[19] - 2*p[9]*p[10]*p[28] - 2*p[11]*p[12]*p[19] + 2*p[11]*p[12]*p[28] + 2*p[11]*p[13]*p[16] - 2*p[11]*p[13]*p[25] - 2*p[12]*p[14]*p[16] + 2*p[12]*p[14]*p[25] - 2*p[13]*p[14]*p[19] + 2*p[13]*p[14]*p[28];
coeff[136] = -4*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[11]*p[31] - 4*p[0]*p[7]*p[12]*p[20] + 2*p[0]*p[7]*p[12]*p[29] + 4*p[0]*p[7]*p[13]*p[17] - 2*p[0]*p[7]*p[13]*p[26] - 4*p[0]*p[8]*p[11]*p[20] + 2*p[0]*p[8]*p[11]*p[29] + 4*p[0]*p[8]*p[12]*p[22] - 2*p[0]*p[8]*p[12]*p[31] - 4*p[0]*p[8]*p[14]*p[17] + 2*p[0]*p[8]*p[14]*p[26] + 4*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[11]*p[26] + 4*p[0]*p[9]*p[13]*p[22] - 2*p[0]*p[9]*p[13]*p[31] - 4*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[9]*p[14]*p[29] - 4*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[12]*p[26] - 4*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[13]*p[29] - 4*p[0]*p[10]*p[14]*p[22] + 2*p[0]*p[10]*p[14]*p[31] + 2*p[7]*p[8]*p[20] - 2*p[7]*p[8]*p[29] - 2*p[7]*p[9]*p[17] + 2*p[7]*p[9]*p[26] - 2*p[8]*p[8]*p[22] + 2*p[8]*p[8]*p[31] + 2*p[8]*p[10]*p[17] - 2*p[8]*p[10]*p[26] - 2*p[9]*p[9]*p[22] + 2*p[9]*p[9]*p[31] + 2*p[9]*p[10]*p[20] - 2*p[9]*p[10]*p[29] - 2*p[11]*p[12]*p[20] + 2*p[11]*p[12]*p[29] + 2*p[11]*p[13]*p[17] - 2*p[11]*p[13]*p[26] + 2*p[12]*p[12]*p[22] - 2*p[12]*p[12]*p[31] - 2*p[12]*p[14]*p[17] + 2*p[12]*p[14]*p[26] + 2*p[13]*p[13]*p[22] - 2*p[13]*p[13]*p[31] - 2*p[13]*p[14]*p[20] + 2*p[13]*p[14]*p[29];
coeff[137] = -2*p[0]*p[7]*p[13]*p[15] + 2*p[0]*p[8]*p[14]*p[15] - 2*p[0]*p[9]*p[11]*p[15] + 2*p[0]*p[10]*p[12]*p[15] + 2*p[7]*p[9]*p[15] - 2*p[7]*p[9]*p[24] - 2*p[8]*p[10]*p[15] + 2*p[8]*p[10]*p[24] - 2*p[11]*p[13]*p[15] + 2*p[11]*p[13]*p[24] + 2*p[12]*p[14]*p[15] - 2*p[12]*p[14]*p[24];
coeff[138] = 2*p[0]*p[7]*p[12]*p[19] - 2*p[0]*p[7]*p[13]*p[16] + 2*p[0]*p[8]*p[11]*p[19] + 2*p[0]*p[8]*p[14]*p[16] - 2*p[0]*p[9]*p[11]*p[16] + 2*p[0]*p[9]*p[14]*p[19] + 2*p[0]*p[10]*p[12]*p[16] + 2*p[0]*p[10]*p[13]*p[19] - 2*p[7]*p[8]*p[19] + 2*p[7]*p[8]*p[28] + 2*p[7]*p[9]*p[16] - 2*p[7]*p[9]*p[25] - 2*p[8]*p[10]*p[16] + 2*p[8]*p[10]*p[25] - 2*p[9]*p[10]*p[19] + 2*p[9]*p[10]*p[28] + 2*p[11]*p[12]*p[19] - 2*p[11]*p[12]*p[28] - 2*p[11]*p[13]*p[16] + 2*p[11]*p[13]*p[25] + 2*p[12]*p[14]*p[16] - 2*p[12]*p[14]*p[25] + 2*p[13]*p[14]*p[19] - 2*p[13]*p[14]*p[28];
coeff[139] = 2*p[0]*p[7]*p[11]*p[22] + 2*p[0]*p[7]*p[12]*p[20] - 2*p[0]*p[7]*p[13]*p[17] + 2*p[0]*p[8]*p[11]*p[20] - 2*p[0]*p[8]*p[12]*p[22] + 2*p[0]*p[8]*p[14]*p[17] - 2*p[0]*p[9]*p[11]*p[17] - 2*p[0]*p[9]*p[13]*p[22] + 2*p[0]*p[9]*p[14]*p[20] + 2*p[0]*p[10]*p[12]*p[17] + 2*p[0]*p[10]*p[13]*p[20] + 2*p[0]*p[10]*p[14]*p[22] - 2*p[7]*p[8]*p[20] + 2*p[7]*p[8]*p[29] + 2*p[7]*p[9]*p[17] - 2*p[7]*p[9]*p[26] + 2*p[8]*p[8]*p[22] - 2*p[8]*p[8]*p[31] - 2*p[8]*p[10]*p[17] + 2*p[8]*p[10]*p[26] + 2*p[9]*p[9]*p[22] - 2*p[9]*p[9]*p[31] - 2*p[9]*p[10]*p[20] + 2*p[9]*p[10]*p[29] + 2*p[11]*p[12]*p[20] - 2*p[11]*p[12]*p[29] - 2*p[11]*p[13]*p[17] + 2*p[11]*p[13]*p[26] - 2*p[12]*p[12]*p[22] + 2*p[12]*p[12]*p[31] + 2*p[12]*p[14]*p[17] - 2*p[12]*p[14]*p[26] - 2*p[13]*p[13]*p[22] + 2*p[13]*p[13]*p[31] + 2*p[13]*p[14]*p[20] - 2*p[13]*p[14]*p[29];
coeff[140] = 0;
coeff[141] = 2*(-p[7]*p[13]*p[15] + p[7]*p[13]*p[24] + p[8]*p[14]*p[15] - p[8]*p[14]*p[24] - p[9]*p[11]*p[15] + p[9]*p[11]*p[24] + p[10]*p[12]*p[15] - p[10]*p[12]*p[24])*p[0];
coeff[142] = 2*(p[7]*p[12]*p[19] - p[7]*p[12]*p[28] - p[7]*p[13]*p[16] + p[7]*p[13]*p[25] + p[8]*p[11]*p[19] - p[8]*p[11]*p[28] + p[8]*p[14]*p[16] - p[8]*p[14]*p[25] - p[9]*p[11]*p[16] + p[9]*p[11]*p[25] + p[9]*p[14]*p[19] - p[9]*p[14]*p[28] + p[10]*p[12]*p[16] - p[10]*p[12]*p[25] + p[10]*p[13]*p[19] - p[10]*p[13]*p[28])*p[0];
coeff[143] = 2*(p[7]*p[11]*p[22] - p[7]*p[11]*p[31] + p[7]*p[12]*p[20] - p[7]*p[12]*p[29] - p[7]*p[13]*p[17] + p[7]*p[13]*p[26] + p[8]*p[11]*p[20] - p[8]*p[11]*p[29] - p[8]*p[12]*p[22] + p[8]*p[12]*p[31] + p[8]*p[14]*p[17] - p[8]*p[14]*p[26] - p[9]*p[11]*p[17] + p[9]*p[11]*p[26] - p[9]*p[13]*p[22] + p[9]*p[13]*p[31] + p[9]*p[14]*p[20] - p[9]*p[14]*p[29] + p[10]*p[12]*p[17] - p[10]*p[12]*p[26] + p[10]*p[13]*p[20] - p[10]*p[13]*p[29] + p[10]*p[14]*p[22] - p[10]*p[14]*p[31])*p[0];
coeff[144] = 2*(p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - p[10]*p[12]*p[15] + p[10]*p[12]*p[24])*p[0];
coeff[145] = 2*(-p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - p[10]*p[13]*p[19] + p[10]*p[13]*p[28])*p[0];
coeff[146] = 2*(-p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - p[10]*p[14]*p[22] + p[10]*p[14]*p[31])*p[0];
coeff[147] = -2*p[0]*p[7]*p[8]*p[21] + 2*p[0]*p[7]*p[9]*p[18] + 2*p[0]*p[8]*p[8]*p[23] - 2*p[0]*p[8]*p[10]*p[18] + 2*p[0]*p[9]*p[9]*p[23] - 2*p[0]*p[9]*p[10]*p[21] + 2*p[0]*p[11]*p[12]*p[21] - 2*p[0]*p[11]*p[13]*p[18] - 2*p[0]*p[12]*p[12]*p[23] + 2*p[0]*p[12]*p[14]*p[18] - 2*p[0]*p[13]*p[13]*p[23] + 2*p[0]*p[13]*p[14]*p[21] - p[7]*p[11]*p[23] + p[7]*p[11]*p[32] - p[7]*p[12]*p[21] + p[7]*p[12]*p[30] + p[7]*p[13]*p[18] - p[7]*p[13]*p[27] - p[8]*p[11]*p[21] + p[8]*p[11]*p[30] + p[8]*p[12]*p[23] - p[8]*p[12]*p[32] - p[8]*p[14]*p[18] + p[8]*p[14]*p[27] + p[9]*p[11]*p[18] - p[9]*p[11]*p[27] + p[9]*p[13]*p[23] - p[9]*p[13]*p[32] - p[9]*p[14]*p[21] + p[9]*p[14]*p[30] - p[10]*p[12]*p[18] + p[10]*p[12]*p[27] - p[10]*p[13]*p[21] + p[10]*p[13]*p[30] - p[10]*p[14]*p[23] + p[10]*p[14]*p[32];
coeff[148] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + p[10]*p[12]*p[24];
coeff[149] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + p[10]*p[13]*p[28];
coeff[150] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + p[10]*p[14]*p[31];
coeff[151] = (-p[7]*p[13] + p[8]*p[14] - p[9]*p[11] + p[10]*p[12])*p[15];
coeff[152] = p[7]*p[12]*p[19] - p[7]*p[13]*p[16] + p[8]*p[11]*p[19] + p[8]*p[14]*p[16] - p[9]*p[11]*p[16] + p[9]*p[14]*p[19] + p[10]*p[12]*p[16] + p[10]*p[13]*p[19];
coeff[153] = p[7]*p[11]*p[22] + p[7]*p[12]*p[20] - p[7]*p[13]*p[17] + p[8]*p[11]*p[20] - p[8]*p[12]*p[22] + p[8]*p[14]*p[17] - p[9]*p[11]*p[17] - p[9]*p[13]*p[22] + p[9]*p[14]*p[20] + p[10]*p[12]*p[17] + p[10]*p[13]*p[20] + p[10]*p[14]*p[22];
coeff[154] = 2*(p[7]*p[8]*p[21] - p[7]*p[8]*p[30] - p[7]*p[9]*p[18] + p[7]*p[9]*p[27] - p[8]*p[8]*p[23] + p[8]*p[8]*p[32] + p[8]*p[10]*p[18] - p[8]*p[10]*p[27] - p[9]*p[9]*p[23] + p[9]*p[9]*p[32] + p[9]*p[10]*p[21] - p[9]*p[10]*p[30] - p[11]*p[12]*p[21] + p[11]*p[12]*p[30] + p[11]*p[13]*p[18] - p[11]*p[13]*p[27] + p[12]*p[12]*p[23] - p[12]*p[12]*p[32] - p[12]*p[14]*p[18] + p[12]*p[14]*p[27] + p[13]*p[13]*p[23] - p[13]*p[13]*p[32] - p[13]*p[14]*p[21] + p[13]*p[14]*p[30])*p[0];
coeff[155] = -4*p[0]*p[7]*p[9]*p[15] + 2*p[0]*p[7]*p[9]*p[24] + 4*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[8]*p[10]*p[24] + 4*p[0]*p[11]*p[13]*p[15] - 2*p[0]*p[11]*p[13]*p[24] - 4*p[0]*p[12]*p[14]*p[15] + 2*p[0]*p[12]*p[14]*p[24] - 2*p[7]*p[13]*p[15] + 2*p[7]*p[13]*p[24] + 2*p[8]*p[14]*p[15] - 2*p[8]*p[14]*p[24] - 2*p[9]*p[11]*p[15] + 2*p[9]*p[11]*p[24] + 2*p[10]*p[12]*p[15] - 2*p[10]*p[12]*p[24];
coeff[156] = 4*p[0]*p[7]*p[8]*p[19] - 2*p[0]*p[7]*p[8]*p[28] - 4*p[0]*p[7]*p[9]*p[16] + 2*p[0]*p[7]*p[9]*p[25] + 4*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[8]*p[10]*p[25] + 4*p[0]*p[9]*p[10]*p[19] - 2*p[0]*p[9]*p[10]*p[28] - 4*p[0]*p[11]*p[12]*p[19] + 2*p[0]*p[11]*p[12]*p[28] + 4*p[0]*p[11]*p[13]*p[16] - 2*p[0]*p[11]*p[13]*p[25] - 4*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[12]*p[14]*p[25] - 4*p[0]*p[13]*p[14]*p[19] + 2*p[0]*p[13]*p[14]*p[28] + 2*p[7]*p[12]*p[19] - 2*p[7]*p[12]*p[28] - 2*p[7]*p[13]*p[16] + 2*p[7]*p[13]*p[25] + 2*p[8]*p[11]*p[19] - 2*p[8]*p[11]*p[28] + 2*p[8]*p[14]*p[16] - 2*p[8]*p[14]*p[25] - 2*p[9]*p[11]*p[16] + 2*p[9]*p[11]*p[25] + 2*p[9]*p[14]*p[19] - 2*p[9]*p[14]*p[28] + 2*p[10]*p[12]*p[16] - 2*p[10]*p[12]*p[25] + 2*p[10]*p[13]*p[19] - 2*p[10]*p[13]*p[28];
coeff[157] = 4*p[0]*p[7]*p[8]*p[20] - 2*p[0]*p[7]*p[8]*p[29] - 4*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[7]*p[9]*p[26] - 4*p[0]*p[8]*p[8]*p[22] + 2*p[0]*p[8]*p[8]*p[31] + 4*p[0]*p[8]*p[10]*p[17] - 2*p[0]*p[8]*p[10]*p[26] - 4*p[0]*p[9]*p[9]*p[22] + 2*p[0]*p[9]*p[9]*p[31] + 4*p[0]*p[9]*p[10]*p[20] - 2*p[0]*p[9]*p[10]*p[29] - 4*p[0]*p[11]*p[12]*p[20] + 2*p[0]*p[11]*p[12]*p[29] + 4*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[11]*p[13]*p[26] + 4*p[0]*p[12]*p[12]*p[22] - 2*p[0]*p[12]*p[12]*p[31] - 4*p[0]*p[12]*p[14]*p[17] + 2*p[0]*p[12]*p[14]*p[26] + 4*p[0]*p[13]*p[13]*p[22] - 2*p[0]*p[13]*p[13]*p[31] - 4*p[0]*p[13]*p[14]*p[20] + 2*p[0]*p[13]*p[14]*p[29] + 2*p[7]*p[11]*p[22] - 2*p[7]*p[11]*p[31] + 2*p[7]*p[12]*p[20] - 2*p[7]*p[12]*p[29] - 2*p[7]*p[13]*p[17] + 2*p[7]*p[13]*p[26] + 2*p[8]*p[11]*p[20] - 2*p[8]*p[11]*p[29] - 2*p[8]*p[12]*p[22] + 2*p[8]*p[12]*p[31] + 2*p[8]*p[14]*p[17] - 2*p[8]*p[14]*p[26] - 2*p[9]*p[11]*p[17] + 2*p[9]*p[11]*p[26] - 2*p[9]*p[13]*p[22] + 2*p[9]*p[13]*p[31] + 2*p[9]*p[14]*p[20] - 2*p[9]*p[14]*p[29] + 2*p[10]*p[12]*p[17] - 2*p[10]*p[12]*p[26] + 2*p[10]*p[13]*p[20] - 2*p[10]*p[13]*p[29] + 2*p[10]*p[14]*p[22] - 2*p[10]*p[14]*p[31];
coeff[158] = 2*p[0]*p[7]*p[9]*p[15] - 2*p[0]*p[8]*p[10]*p[15] - 2*p[0]*p[11]*p[13]*p[15] + 2*p[0]*p[12]*p[14]*p[15] + 2*p[7]*p[13]*p[15] - 2*p[7]*p[13]*p[24] - 2*p[8]*p[14]*p[15] + 2*p[8]*p[14]*p[24] + 2*p[9]*p[11]*p[15] - 2*p[9]*p[11]*p[24] - 2*p[10]*p[12]*p[15] + 2*p[10]*p[12]*p[24];
coeff[159] = -2*p[0]*p[7]*p[8]*p[19] + 2*p[0]*p[7]*p[9]*p[16] - 2*p[0]*p[8]*p[10]*p[16] - 2*p[0]*p[9]*p[10]*p[19] + 2*p[0]*p[11]*p[12]*p[19] - 2*p[0]*p[11]*p[13]*p[16] + 2*p[0]*p[12]*p[14]*p[16] + 2*p[0]*p[13]*p[14]*p[19] - 2*p[7]*p[12]*p[19] + 2*p[7]*p[12]*p[28] + 2*p[7]*p[13]*p[16] - 2*p[7]*p[13]*p[25] - 2*p[8]*p[11]*p[19] + 2*p[8]*p[11]*p[28] - 2*p[8]*p[14]*p[16] + 2*p[8]*p[14]*p[25] + 2*p[9]*p[11]*p[16] - 2*p[9]*p[11]*p[25] - 2*p[9]*p[14]*p[19] + 2*p[9]*p[14]*p[28] - 2*p[10]*p[12]*p[16] + 2*p[10]*p[12]*p[25] - 2*p[10]*p[13]*p[19] + 2*p[10]*p[13]*p[28];
coeff[160] = -2*p[0]*p[7]*p[8]*p[20] + 2*p[0]*p[7]*p[9]*p[17] + 2*p[0]*p[8]*p[8]*p[22] - 2*p[0]*p[8]*p[10]*p[17] + 2*p[0]*p[9]*p[9]*p[22] - 2*p[0]*p[9]*p[10]*p[20] + 2*p[0]*p[11]*p[12]*p[20] - 2*p[0]*p[11]*p[13]*p[17] - 2*p[0]*p[12]*p[12]*p[22] + 2*p[0]*p[12]*p[14]*p[17] - 2*p[0]*p[13]*p[13]*p[22] + 2*p[0]*p[13]*p[14]*p[20] - 2*p[7]*p[11]*p[22] + 2*p[7]*p[11]*p[31] - 2*p[7]*p[12]*p[20] + 2*p[7]*p[12]*p[29] + 2*p[7]*p[13]*p[17] - 2*p[7]*p[13]*p[26] - 2*p[8]*p[11]*p[20] + 2*p[8]*p[11]*p[29] + 2*p[8]*p[12]*p[22] - 2*p[8]*p[12]*p[31] - 2*p[8]*p[14]*p[17] + 2*p[8]*p[14]*p[26] + 2*p[9]*p[11]*p[17] - 2*p[9]*p[11]*p[26] + 2*p[9]*p[13]*p[22] - 2*p[9]*p[13]*p[31] - 2*p[9]*p[14]*p[20] + 2*p[9]*p[14]*p[29] - 2*p[10]*p[12]*p[17] + 2*p[10]*p[12]*p[26] - 2*p[10]*p[13]*p[20] + 2*p[10]*p[13]*p[29] - 2*p[10]*p[14]*p[22] + 2*p[10]*p[14]*p[31];
coeff[161] = 0;
coeff[162] = 2*(p[7]*p[9]*p[15] - p[7]*p[9]*p[24] - p[8]*p[10]*p[15] + p[8]*p[10]*p[24] - p[11]*p[13]*p[15] + p[11]*p[13]*p[24] + p[12]*p[14]*p[15] - p[12]*p[14]*p[24])*p[0];
coeff[163] = 2*(-p[7]*p[8]*p[19] + p[7]*p[8]*p[28] + p[7]*p[9]*p[16] - p[7]*p[9]*p[25] - p[8]*p[10]*p[16] + p[8]*p[10]*p[25] - p[9]*p[10]*p[19] + p[9]*p[10]*p[28] + p[11]*p[12]*p[19] - p[11]*p[12]*p[28] - p[11]*p[13]*p[16] + p[11]*p[13]*p[25] + p[12]*p[14]*p[16] - p[12]*p[14]*p[25] + p[13]*p[14]*p[19] - p[13]*p[14]*p[28])*p[0];
coeff[164] = 2*(-p[7]*p[8]*p[20] + p[7]*p[8]*p[29] + p[7]*p[9]*p[17] - p[7]*p[9]*p[26] + p[8]*p[8]*p[22] - p[8]*p[8]*p[31] - p[8]*p[10]*p[17] + p[8]*p[10]*p[26] + p[9]*p[9]*p[22] - p[9]*p[9]*p[31] - p[9]*p[10]*p[20] + p[9]*p[10]*p[29] + p[11]*p[12]*p[20] - p[11]*p[12]*p[29] - p[11]*p[13]*p[17] + p[11]*p[13]*p[26] - p[12]*p[12]*p[22] + p[12]*p[12]*p[31] + p[12]*p[14]*p[17] - p[12]*p[14]*p[26] - p[13]*p[13]*p[22] + p[13]*p[13]*p[31] + p[13]*p[14]*p[20] - p[13]*p[14]*p[29])*p[0];
coeff[165] = 2*(-p[7]*p[9]*p[15] + p[7]*p[9]*p[24] + p[8]*p[10]*p[15] - p[8]*p[10]*p[24] + p[11]*p[13]*p[15] - p[11]*p[13]*p[24] - p[12]*p[14]*p[15] + p[12]*p[14]*p[24])*p[0];
coeff[166] = 2*(p[7]*p[8]*p[19] - p[7]*p[8]*p[28] - p[7]*p[9]*p[16] + p[7]*p[9]*p[25] + p[8]*p[10]*p[16] - p[8]*p[10]*p[25] + p[9]*p[10]*p[19] - p[9]*p[10]*p[28] - p[11]*p[12]*p[19] + p[11]*p[12]*p[28] + p[11]*p[13]*p[16] - p[11]*p[13]*p[25] - p[12]*p[14]*p[16] + p[12]*p[14]*p[25] - p[13]*p[14]*p[19] + p[13]*p[14]*p[28])*p[0];
coeff[167] = 2*(p[7]*p[8]*p[20] - p[7]*p[8]*p[29] - p[7]*p[9]*p[17] + p[7]*p[9]*p[26] - p[8]*p[8]*p[22] + p[8]*p[8]*p[31] + p[8]*p[10]*p[17] - p[8]*p[10]*p[26] - p[9]*p[9]*p[22] + p[9]*p[9]*p[31] + p[9]*p[10]*p[20] - p[9]*p[10]*p[29] - p[11]*p[12]*p[20] + p[11]*p[12]*p[29] + p[11]*p[13]*p[17] - p[11]*p[13]*p[26] + p[12]*p[12]*p[22] - p[12]*p[12]*p[31] - p[12]*p[14]*p[17] + p[12]*p[14]*p[26] + p[13]*p[13]*p[22] - p[13]*p[13]*p[31] - p[13]*p[14]*p[20] + p[13]*p[14]*p[29])*p[0];
}
} // namespace embree

View file

@ -0,0 +1,137 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/* Point query structure for closest point query */
template<int K>
struct RTC_ALIGN(16) PointQueryK
{
/* Default construction does nothing */
__forceinline PointQueryK() {}
/* Constructs a ray from origin, direction, and ray segment. Near
* has to be smaller than far */
__forceinline PointQueryK(const Vec3vf<K>& p, const vfloat<K>& radius = inf, const vfloat<K>& time = zero)
: p(p), time(time), radius(radius) {}
/* Returns the size of the ray */
static __forceinline size_t size() { return K; }
/* Calculates if this is a valid ray that does not cause issues during traversal */
__forceinline vbool<K> valid() const
{
const vbool<K> vx = (abs(p.x) <= vfloat<K>(FLT_LARGE));
const vbool<K> vy = (abs(p.y) <= vfloat<K>(FLT_LARGE));
const vbool<K> vz = (abs(p.z) <= vfloat<K>(FLT_LARGE));
const vbool<K> vn = radius >= vfloat<K>(0);
const vbool<K> vf = abs(time) < vfloat<K>(inf);
return vx & vy & vz & vn & vf;
}
__forceinline void get(PointQueryK<1>* ray) const;
__forceinline void get(size_t i, PointQueryK<1>& ray) const;
__forceinline void set(const PointQueryK<1>* ray);
__forceinline void set(size_t i, const PointQueryK<1>& ray);
Vec3vf<K> p; // location of the query point
vfloat<K> time; // time for motion blur
vfloat<K> radius; // radius for the point query
};
/* Specialization for a single point query */
template<>
struct RTC_ALIGN(16) PointQueryK<1>
{
/* Default construction does nothing */
__forceinline PointQueryK() {}
/* Constructs a ray from origin, direction, and ray segment. Near
* has to be smaller than far */
__forceinline PointQueryK(const Vec3fa& p, float radius = inf, float time = zero)
: p(p), time(time), radius(radius) {}
/* Calculates if this is a valid ray that does not cause issues during traversal */
__forceinline bool valid() const {
return all(le_mask(abs(Vec3fa(p)), Vec3fa(FLT_LARGE)) & le_mask(Vec3fa(0.f), Vec3fa(radius))) && abs(time) < float(inf);
}
Vec3f p;
float time;
float radius;
};
/* Converts point query packet to single point query */
template<int K>
__forceinline void PointQueryK<K>::get(PointQueryK<1>* query) const
{
for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
{
query[i].p.x = p.x[i];
query[i].p.y = p.y[i];
query[i].p.z = p.z[i];
query[i].time = time[i];
query[i].radius = radius[i];
}
}
/* Extracts a single point query out of a point query packet*/
template<int K>
__forceinline void PointQueryK<K>::get(size_t i, PointQueryK<1>& query) const
{
query.p.x = p.x[i];
query.p.y = p.y[i];
query.p.z = p.z[i];
query.radius = radius[i];
query.time = time[i];
}
/* Converts single point query to point query packet */
template<int K>
__forceinline void PointQueryK<K>::set(const PointQueryK<1>* query)
{
for (size_t i = 0; i < K; i++)
{
p.x[i] = query[i].p.x;
p.y[i] = query[i].p.y;
p.z[i] = query[i].p.z;
radius[i] = query[i].radius;
time[i] = query[i].time;
}
}
/* inserts a single point query into a point query packet element */
template<int K>
__forceinline void PointQueryK<K>::set(size_t i, const PointQueryK<1>& query)
{
p.x[i] = query.p.x;
p.y[i] = query.p.y;
p.z[i] = query.p.z;
radius[i] = query.radius;
time[i] = query.time;
}
/* Shortcuts */
typedef PointQueryK<1> PointQuery;
typedef PointQueryK<4> PointQuery4;
typedef PointQueryK<8> PointQuery8;
typedef PointQueryK<16> PointQuery16;
typedef PointQueryK<VSIZEX> PointQueryx;
struct PointQueryN;
/* Outputs point query to stream */
template<int K>
__forceinline embree_ostream operator <<(embree_ostream cout, const PointQueryK<K>& query)
{
cout << "{ " << embree_endl
<< " p = " << query.p << embree_endl
<< " r = " << query.radius << embree_endl
<< " time = " << query.time << embree_endl
<< "}";
return cout;
}
}

View file

@ -0,0 +1,159 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/*! helper structure for the implementation of the profile functions below */
struct ProfileTimer
{
static const size_t N = 20;
ProfileTimer () {}
ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
{
for (size_t i=0; i<N; i++) names[i] = nullptr;
for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
}
__forceinline void begin()
{
j=0;
t0 = tj = getSeconds();
}
__forceinline void end() {
absolute("total");
i++;
}
__forceinline void operator() (const char* name) {
relative(name);
}
__forceinline void absolute (const char* name)
{
const double t1 = getSeconds();
const double dt = t1-t0;
assert(names[j] == nullptr || names[j] == name);
names[j] = name;
if (i == 0) dt_fst[j] = dt;
if (i>=numSkip) {
dt_min[j] = min(dt_min[j],dt);
dt_avg[j] = dt_avg[j] + dt;
dt_max[j] = max(dt_max[j],dt);
}
j++;
maxJ = max(maxJ,j);
}
__forceinline void relative (const char* name)
{
const double t1 = getSeconds();
const double dt = t1-tj;
tj = t1;
assert(names[j] == nullptr || names[j] == name);
names[j] = name;
if (i == 0) dt_fst[j] = dt;
if (i>=numSkip) {
dt_min[j] = min(dt_min[j],dt);
dt_avg[j] = dt_avg[j] + dt;
dt_max[j] = max(dt_max[j],dt);
}
j++;
maxJ = max(maxJ,j);
}
void print(size_t numElements)
{
for (size_t k=0; k<N; k++)
dt_avg[k] /= double(i-numSkip);
printf(" profile [M/s]:\n");
for (size_t j=0; j<maxJ; j++)
printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
printf(" profile [ms]:\n");
for (size_t j=0; j<maxJ; j++)
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
}
void print()
{
printf(" profile:\n");
for (size_t k=0; k<N; k++)
dt_avg[k] /= double(i-numSkip);
for (size_t j=0; j<maxJ; j++) {
printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
}
}
double avg() {
return dt_avg[maxJ-1]/double(i-numSkip);
}
private:
size_t i;
size_t j;
size_t maxJ;
size_t numSkip;
double t0;
double tj;
const char* names[N];
double dt_fst[N];
double dt_min[N];
double dt_avg[N];
double dt_max[N];
};
/*! This function executes some code block multiple times and measured sections of it.
Use the following way:
profile(1,10,1000,[&](ProfileTimer& timer) {
// code
timer("A");
// code
timer("B");
});
*/
template<typename Closure>
void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
{
ProfileTimer timer(numSkip);
for (size_t i=0; i<numSkip+numIter; i++)
{
timer.begin();
closure(timer);
timer.end();
}
timer.print(numElements);
}
/*! similar as the function above, but the timer object comes externally */
template<typename Closure>
void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
{
timer = ProfileTimer(numSkip);
for (size_t i=0; i<numSkip+numIter; i++)
{
timer.begin();
closure(timer);
timer.end();
}
timer.print(numElements);
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,162 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../../include/embree4/rtcore.h"
RTC_NAMESPACE_USE
namespace embree
{
/*! decoding of intersection flags */
__forceinline bool isCoherent (RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_COHERENT; }
__forceinline bool isIncoherent(RTCRayQueryFlags flags) { return (flags & RTC_RAY_QUERY_FLAG_COHERENT) == RTC_RAY_QUERY_FLAG_INCOHERENT; }
/*! Macros used in the rtcore API implementation */
// -- GODOT start --
#define RTC_CATCH_BEGIN
#define RTC_CATCH_END(device)
#define RTC_CATCH_END2(scene)
#define RTC_CATCH_END2_FALSE(scene) return false;
#if 0
// -- GODOT end --
#define RTC_CATCH_BEGIN try {
#define RTC_CATCH_END(device) \
} catch (std::bad_alloc&) { \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
} catch (rtcore_error& e) { \
Device::process_error(device,e.error,e.what()); \
} catch (std::exception& e) { \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
} catch (...) { \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
}
#define RTC_CATCH_END2(scene) \
} catch (std::bad_alloc&) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
} catch (rtcore_error& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,e.error,e.what()); \
} catch (std::exception& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
} catch (...) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
}
#define RTC_CATCH_END2_FALSE(scene) \
} catch (std::bad_alloc&) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
return false; \
} catch (rtcore_error& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,e.error,e.what()); \
return false; \
} catch (std::exception& e) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
return false; \
} catch (...) { \
Device* device = scene ? scene->device : nullptr; \
Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
return false; \
}
#endif
#define RTC_VERIFY_HANDLE(handle) \
if (handle == nullptr) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_GEOMID(id) \
if (id == RTC_INVALID_GEOMETRY_ID) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_UPPER(id,upper) \
if (id > upper) { \
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"invalid argument"); \
}
#define RTC_VERIFY_RANGE(id,lower,upper) \
if (id < lower || id > upper) \
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"argument out of bounds");
#if 0 // enable to debug print all API calls
#define RTC_TRACE(x) std::cout << #x << std::endl;
#else
#define RTC_TRACE(x)
#endif
// -- GODOT start --
#if 0
/*! used to throw embree API errors */
struct rtcore_error : public std::exception
{
__forceinline rtcore_error(RTCError error, const std::string& str)
: error(error), str(str) {}
~rtcore_error() throw() {}
const char* what () const throw () {
return str.c_str();
}
RTCError error;
std::string str;
};
#endif
#if defined(DEBUG) // only report file and line in debug mode
#define throw_RTCError(error,str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
// throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
#define throw_RTCError(error,str) \
abort();
// throw rtcore_error(error,str);
#endif
// -- GODOT end --
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
(settings.byteSize > (offsetof(RTCBuildArguments,member)+sizeof(settings.member)))
inline void storeTransform(const AffineSpace3fa& space, RTCFormat format, float* xfm)
{
switch (format)
{
case RTC_FORMAT_FLOAT3X4_ROW_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vy.x; xfm[ 2] = space.l.vz.x; xfm[ 3] = space.p.x;
xfm[ 4] = space.l.vx.y; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vz.y; xfm[ 7] = space.p.y;
xfm[ 8] = space.l.vx.z; xfm[ 9] = space.l.vy.z; xfm[10] = space.l.vz.z; xfm[11] = space.p.z;
break;
case RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z;
xfm[ 3] = space.l.vy.x; xfm[ 4] = space.l.vy.y; xfm[ 5] = space.l.vy.z;
xfm[ 6] = space.l.vz.x; xfm[ 7] = space.l.vz.y; xfm[ 8] = space.l.vz.z;
xfm[ 9] = space.p.x; xfm[10] = space.p.y; xfm[11] = space.p.z;
break;
case RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR:
xfm[ 0] = space.l.vx.x; xfm[ 1] = space.l.vx.y; xfm[ 2] = space.l.vx.z; xfm[ 3] = 0.f;
xfm[ 4] = space.l.vy.x; xfm[ 5] = space.l.vy.y; xfm[ 6] = space.l.vy.z; xfm[ 7] = 0.f;
xfm[ 8] = space.l.vz.x; xfm[ 9] = space.l.vz.y; xfm[10] = space.l.vz.z; xfm[11] = 0.f;
xfm[12] = space.p.x; xfm[13] = space.p.y; xfm[14] = space.p.z; xfm[15] = 1.f;
break;
default:
#if !defined(__SYCL_DEVICE_ONLY__)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid matrix format");
#endif
break;
}
}
}

View file

@ -0,0 +1,442 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_EXPORT_API
#include "default.h"
#include "device.h"
#include "scene.h"
#include "context.h"
#include "alloc.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/bvh_builder_morton.h"
namespace embree
{
namespace isa // FIXME: support more ISAs for builders
{
struct BVH : public RefCount
{
BVH (Device* device)
: device(device), allocator(device,true), morton_src(device,0), morton_tmp(device,0)
{
device->refInc();
}
~BVH() {
device->refDec();
}
public:
Device* device;
FastAllocator allocator;
mvector<BVHBuilderMorton::BuildPrim> morton_src;
mvector<BVHBuilderMorton::BuildPrim> morton_tmp;
};
void* rtcBuildBVHMorton(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims_i = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* initialize temporary arrays for morton builder */
PrimRef* prims = (PrimRef*) prims_i;
mvector<BVHBuilderMorton::BuildPrim>& morton_src = bvh->morton_src;
mvector<BVHBuilderMorton::BuildPrim>& morton_tmp = bvh->morton_tmp;
morton_src.resize(primitiveCount);
morton_tmp.resize(primitiveCount);
/* compute centroid bounds */
const BBox3fa centBounds = parallel_reduce ( size_t(0), primitiveCount, BBox3fa(empty), [&](const range<size_t>& r) -> BBox3fa {
BBox3fa bounds(empty);
for (size_t i=r.begin(); i<r.end(); i++)
bounds.extend(prims[i].bounds().center2());
return bounds;
}, BBox3fa::merge);
/* compute morton codes */
BVHBuilderMorton::MortonCodeMapping mapping(centBounds);
parallel_for ( size_t(0), primitiveCount, [&](const range<size_t>& r) {
BVHBuilderMorton::MortonCodeGenerator generator(mapping,&morton_src[r.begin()]);
for (size_t i=r.begin(); i<r.end(); i++) {
generator(prims[i].bounds(),(unsigned) i);
}
});
/* start morton build */
std::pair<void*,BBox3fa> root = BVHBuilderMorton::build<std::pair<void*,BBox3fa>>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that allocates BVH nodes */
[&] ( const FastAllocator::CachedAllocator& alloc, size_t N ) -> void* {
return createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
},
/* lambda function that sets bounds */
[&] (void* node, const std::pair<void*,BBox3fa>* children, size_t N) -> std::pair<void*,BBox3fa>
{
BBox3fa bounds = empty;
void* childptrs[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
const RTCBounds* cbounds[BVHBuilderMorton::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) {
bounds.extend(children[i].second);
childptrs[i] = children[i].first;
cbounds[i] = (const RTCBounds*)&children[i].second;
}
setNodeBounds(node,cbounds,(unsigned int)N,userPtr);
setNodeChildren(node,childptrs, (unsigned int)N,userPtr);
return std::make_pair(node,bounds);
},
/* lambda function that creates BVH leaves */
[&]( const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc) -> std::pair<void*,BBox3fa>
{
RTCBuildPrimitive localBuildPrims[RTC_BUILD_MAX_PRIMITIVES_PER_LEAF];
BBox3fa bounds = empty;
for (size_t i=0;i<current.size();i++)
{
const size_t id = morton_src[current.begin()+i].index;
bounds.extend(prims[id].bounds());
localBuildPrims[i] = prims_i[id];
}
void* node = createLeaf((RTCThreadLocalAllocator)&alloc,localBuildPrims,current.size(),userPtr);
return std::make_pair(node,bounds);
},
/* lambda that calculates the bounds for some primitive */
[&] (const BVHBuilderMorton::BuildPrim& morton) -> BBox3fa {
return prims[morton.index].bounds();
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
morton_src.data(),morton_tmp.data(),primitiveCount,
*arguments);
bvh->allocator.cleanup();
return root.first;
}
void* rtcBuildBVHBinnedSAH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* calculate priminfo */
auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa
{
CentGeomBBox3fa bounds(empty);
for (size_t j=r.begin(); j<r.end(); j++)
bounds.extend((BBox3fa&)prims[j]);
return bounds;
};
const CentGeomBBox3fa bounds =
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);
const PrimInfo pinfo(0,primitiveCount,bounds);
/* build BVH */
void* root = BVHBuilderBinnedSAH::build<void*>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that creates BVH nodes */
[&](BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
{
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
return node;
},
/* lambda function that updates BVH nodes */
[&](const BVHBuilderBinnedSAH::BuildRecord& precord, const BVHBuilderBinnedSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
setNodeChildren(node,children, (unsigned int)N,userPtr);
return node;
},
/* lambda function that creates BVH leaves */
[&](const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
(PrimRef*)prims,pinfo,*arguments);
bvh->allocator.cleanup();
return root;
}
static __forceinline const std::pair<CentGeomBBox3fa,unsigned int> mergePair(const std::pair<CentGeomBBox3fa,unsigned int>& a, const std::pair<CentGeomBBox3fa,unsigned int>& b) {
CentGeomBBox3fa centBounds = CentGeomBBox3fa::merge2(a.first,b.first);
unsigned int maxGeomID = max(a.second,b.second);
return std::pair<CentGeomBBox3fa,unsigned int>(centBounds,maxGeomID);
}
void* rtcBuildBVHSpatialSAH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTCBuildPrimitive* prims = arguments->primitives;
size_t primitiveCount = arguments->primitiveCount;
RTCCreateNodeFunction createNode = arguments->createNode;
RTCSetNodeChildrenFunction setNodeChildren = arguments->setNodeChildren;
RTCSetNodeBoundsFunction setNodeBounds = arguments->setNodeBounds;
RTCCreateLeafFunction createLeaf = arguments->createLeaf;
RTCSplitPrimitiveFunction splitPrimitive = arguments->splitPrimitive;
RTCProgressMonitorFunction buildProgress = arguments->buildProgress;
void* userPtr = arguments->userPtr;
std::atomic<size_t> progress(0);
/* calculate priminfo */
auto computeBounds = [&](const range<size_t>& r) -> std::pair<CentGeomBBox3fa,unsigned int>
{
CentGeomBBox3fa bounds(empty);
unsigned maxGeomID = 0;
for (size_t j=r.begin(); j<r.end(); j++)
{
bounds.extend((BBox3fa&)prims[j]);
maxGeomID = max(maxGeomID,prims[j].geomID);
}
return std::pair<CentGeomBBox3fa,unsigned int>(bounds,maxGeomID);
};
const std::pair<CentGeomBBox3fa,unsigned int> pair =
parallel_reduce(size_t(0),primitiveCount,size_t(1024),size_t(1024),std::pair<CentGeomBBox3fa,unsigned int>(CentGeomBBox3fa(empty),0), computeBounds, mergePair);
CentGeomBBox3fa bounds = pair.first;
const unsigned int maxGeomID = pair.second;
if (unlikely(maxGeomID >= ((unsigned int)1 << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS))))
{
/* fallback code for max geomID larger than threshold */
return rtcBuildBVHBinnedSAH(arguments);
}
const PrimInfo pinfo(0,primitiveCount,bounds);
/* function that splits a build primitive */
struct Splitter
{
Splitter (RTCSplitPrimitiveFunction splitPrimitive, unsigned geomID, unsigned primID, void* userPtr)
: splitPrimitive(splitPrimitive), geomID(geomID), primID(primID), userPtr(userPtr) {}
__forceinline void operator() (PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const
{
prim.geomIDref() &= BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK;
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
left_o.geomIDref() = geomID; left_o.primIDref() = primID;
right_o.geomIDref() = geomID; right_o.primIDref() = primID;
}
__forceinline void operator() (const BBox3fa& box, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const
{
PrimRef prim(box,geomID & BVHBuilderBinnedFastSpatialSAH::GEOMID_MASK,primID);
splitPrimitive((RTCBuildPrimitive*)&prim,(unsigned)dim,pos,(RTCBounds*)&left_o,(RTCBounds*)&right_o,userPtr);
}
RTCSplitPrimitiveFunction splitPrimitive;
unsigned geomID;
unsigned primID;
void* userPtr;
};
/* build BVH */
void* root = BVHBuilderBinnedFastSpatialSAH::build<void*>(
/* thread local allocator for fast allocations */
[&] () -> FastAllocator::CachedAllocator {
return bvh->allocator.getCachedAllocator();
},
/* lambda function that creates BVH nodes */
[&] (BVHBuilderBinnedFastSpatialSAH::BuildRecord* children, const size_t N, const FastAllocator::CachedAllocator& alloc) -> void*
{
void* node = createNode((RTCThreadLocalAllocator)&alloc, (unsigned int)N,userPtr);
const RTCBounds* cbounds[GeneralBVHBuilder::MAX_BRANCHING_FACTOR];
for (size_t i=0; i<N; i++) cbounds[i] = (const RTCBounds*) &children[i].prims.geomBounds;
setNodeBounds(node,cbounds, (unsigned int)N,userPtr);
return node;
},
/* lambda function that updates BVH nodes */
[&] (const BVHBuilderBinnedFastSpatialSAH::BuildRecord& precord, const BVHBuilderBinnedFastSpatialSAH::BuildRecord* crecords, void* node, void** children, const size_t N) -> void* {
setNodeChildren(node,children, (unsigned int)N,userPtr);
return node;
},
/* lambda function that creates BVH leaves */
[&] (const PrimRef* prims, const range<size_t>& range, const FastAllocator::CachedAllocator& alloc) -> void* {
return createLeaf((RTCThreadLocalAllocator)&alloc,(RTCBuildPrimitive*)(prims+range.begin()),range.size(),userPtr);
},
/* returns the splitter */
[&] ( const PrimRef& prim ) -> Splitter {
return Splitter(splitPrimitive,prim.geomID(),prim.primID(),userPtr);
},
/* progress monitor function */
[&] (size_t dn) {
if (!buildProgress) return true;
const size_t n = progress.fetch_add(dn)+dn;
const double f = std::min(1.0,double(n)/double(primitiveCount));
return buildProgress(userPtr,f);
},
(PrimRef*)prims,
arguments->primitiveArrayCapacity,
pinfo,*arguments);
bvh->allocator.cleanup();
return root;
}
}
}
using namespace embree;
using namespace embree::isa;
RTC_NAMESPACE_BEGIN
RTC_API RTCBVH rtcNewBVH(RTCDevice device)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcNewAllocator);
RTC_VERIFY_HANDLE(device);
BVH* bvh = new BVH((Device*)device);
return (RTCBVH) bvh->refInc();
RTC_CATCH_END((Device*)device);
return nullptr;
}
RTC_API void* rtcBuildBVH(const RTCBuildArguments* arguments)
{
BVH* bvh = (BVH*) arguments->bvh;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcBuildBVH);
RTC_VERIFY_HANDLE(bvh);
RTC_VERIFY_HANDLE(arguments);
RTC_VERIFY_HANDLE(arguments->createNode);
RTC_VERIFY_HANDLE(arguments->setNodeChildren);
RTC_VERIFY_HANDLE(arguments->setNodeBounds);
RTC_VERIFY_HANDLE(arguments->createLeaf);
if (arguments->primitiveArrayCapacity < arguments->primitiveCount)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"primitiveArrayCapacity must be greater or equal to primitiveCount")
/* initialize the allocator */
bvh->allocator.init_estimate(arguments->primitiveCount*sizeof(BBox3fa));
bvh->allocator.reset();
/* switch between different builders based on quality level */
if (arguments->buildQuality == RTC_BUILD_QUALITY_LOW)
return rtcBuildBVHMorton(arguments);
else if (arguments->buildQuality == RTC_BUILD_QUALITY_MEDIUM)
return rtcBuildBVHBinnedSAH(arguments);
else if (arguments->buildQuality == RTC_BUILD_QUALITY_HIGH) {
if (arguments->splitPrimitive == nullptr || arguments->primitiveArrayCapacity <= arguments->primitiveCount)
return rtcBuildBVHBinnedSAH(arguments);
else
return rtcBuildBVHSpatialSAH(arguments);
}
else
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid build quality");
/* if we are in dynamic mode, then do not clear temporary data */
if (!(arguments->buildFlags & RTC_BUILD_FLAG_DYNAMIC))
{
bvh->morton_src.clear();
bvh->morton_tmp.clear();
}
RTC_CATCH_END(bvh->device);
return nullptr;
}
RTC_API void* rtcThreadLocalAlloc(RTCThreadLocalAllocator localAllocator, size_t bytes, size_t align)
{
FastAllocator::CachedAllocator* alloc = (FastAllocator::CachedAllocator*) localAllocator;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcThreadLocalAlloc);
return alloc->malloc0(bytes,align);
RTC_CATCH_END(alloc->alloc->getDevice());
return nullptr;
}
RTC_API void rtcMakeStaticBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcStaticBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->morton_src.clear();
bvh->morton_tmp.clear();
RTC_CATCH_END(bvh->device);
}
RTC_API void rtcRetainBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
Device* device = bvh ? bvh->device : nullptr;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcRetainBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->refInc();
RTC_CATCH_END(device);
}
RTC_API void rtcReleaseBVH(RTCBVH hbvh)
{
BVH* bvh = (BVH*) hbvh;
Device* device = bvh ? bvh->device : nullptr;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcReleaseBVH);
RTC_VERIFY_HANDLE(hbvh);
bvh->refDec();
RTC_CATCH_END(device);
}
RTC_NAMESPACE_END

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,400 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "device.h"
#include "builder.h"
#include "scene_triangle_mesh.h"
#include "scene_quad_mesh.h"
#include "scene_user_geometry.h"
#include "scene_instance.h"
#include "scene_instance_array.h"
#include "scene_curves.h"
#include "scene_line_segments.h"
#include "scene_subdiv_mesh.h"
#include "scene_grid_mesh.h"
#include "scene_points.h"
#include "../subdiv/tessellation_cache.h"
#include "acceln.h"
#include "geometry.h"
#if defined(EMBREE_SYCL_SUPPORT)
#include "../sycl/rthwif_embree_builder.h"
#endif
namespace embree
{
struct TaskGroup;
/*! Base class all scenes are derived from */
class Scene : public AccelN
{
ALIGNED_CLASS_USM_(std::alignment_of<Scene>::value);
public:
template<typename Ty, bool mblur = false>
class Iterator
{
public:
Iterator () {}
Iterator (Scene* scene, bool all = false)
: scene(scene), all(all) {}
__forceinline Ty* at(const size_t i)
{
Geometry* geom = scene->geometries[i].ptr;
if (geom == nullptr) return nullptr;
if (!all && !geom->isEnabled()) return nullptr;
const size_t mask = geom->getTypeMask() & Ty::geom_type;
if (!(mask)) return nullptr;
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
return (Ty*) geom;
}
__forceinline Ty* operator[] (const size_t i) {
return at(i);
}
__forceinline size_t size() const {
return scene->size();
}
__forceinline size_t numPrimitives() const {
return scene->getNumPrimitives(Ty::geom_type,mblur);
}
__forceinline size_t maxPrimitivesPerGeometry()
{
size_t ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,mesh->size());
}
return ret;
}
__forceinline unsigned int maxGeomID()
{
unsigned int ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,(unsigned int)i);
}
return ret;
}
__forceinline unsigned maxTimeStepsPerGeometry()
{
unsigned ret = 0;
for (size_t i=0; i<scene->size(); i++) {
Ty* mesh = at(i);
if (mesh == nullptr) continue;
ret = max(ret,mesh->numTimeSteps);
}
return ret;
}
private:
Scene* scene;
bool all;
};
class Iterator2
{
public:
Iterator2 () {}
Iterator2 (Scene* scene, Geometry::GTypeMask typemask, bool mblur)
: scene(scene), typemask(typemask), mblur(mblur) {}
__forceinline Geometry* at(const size_t i)
{
Geometry* geom = scene->geometries[i].ptr;
if (geom == nullptr) return nullptr;
if (!geom->isEnabled()) return nullptr;
if (!(geom->getTypeMask() & typemask)) return nullptr;
if ((geom->numTimeSteps != 1) != mblur) return nullptr;
return geom;
}
__forceinline Geometry* operator[] (const size_t i) {
return at(i);
}
__forceinline size_t size() const {
return scene->size();
}
private:
Scene* scene;
Geometry::GTypeMask typemask;
bool mblur;
};
public:
/*! Scene construction */
Scene (Device* device);
/*! Scene destruction */
~Scene () noexcept;
private:
/*! class is non-copyable */
Scene (const Scene& other) DELETED; // do not implement
Scene& operator= (const Scene& other) DELETED; // do not implement
public:
void createTriangleAccel();
void createTriangleMBAccel();
void createQuadAccel();
void createQuadMBAccel();
void createHairAccel();
void createHairMBAccel();
void createSubdivAccel();
void createSubdivMBAccel();
void createUserGeometryAccel();
void createUserGeometryMBAccel();
void createInstanceAccel();
void createInstanceMBAccel();
void createInstanceExpensiveAccel();
void createInstanceExpensiveMBAccel();
void createInstanceArrayAccel();
void createInstanceArrayMBAccel();
void createGridAccel();
void createGridMBAccel();
/*! prints statistics about the scene */
void printStatistics();
/*! clears the scene */
void clear();
/*! detaches some geometry */
void detachGeometry(size_t geomID);
void setBuildQuality(RTCBuildQuality quality_flags);
RTCBuildQuality getBuildQuality() const;
void setSceneFlags(RTCSceneFlags scene_flags);
RTCSceneFlags getSceneFlags() const;
void build_cpu_accels();
void build_gpu_accels();
void commit (bool join);
void commit_task ();
void build () {}
/* return number of geometries */
__forceinline size_t size() const { return geometries.size(); }
/* bind geometry to the scene */
unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
/* determines if scene is modified */
__forceinline bool isModified() const { return modified; }
/* sets modified flag */
__forceinline void setModified(bool f = true) {
modified = f;
}
__forceinline bool isGeometryModified(size_t geomID)
{
Ref<Geometry>& g = geometries[geomID];
if (!g) return false;
return g->getModCounter() > geometryModCounters_[geomID];
}
protected:
void checkIfModifiedAndSet ();
public:
/* get mesh by ID */
__forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
__forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
template<typename Mesh>
__forceinline Mesh* get(size_t i) {
assert(i < geometries.size());
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
return (Mesh*)geometries[i].ptr;
}
template<typename Mesh>
__forceinline const Mesh* get(size_t i) const {
assert(i < geometries.size());
assert(geometries[i]->getTypeMask() & Mesh::geom_type);
return (Mesh*)geometries[i].ptr;
}
template<typename Mesh>
__forceinline Mesh* getSafe(size_t i) {
assert(i < geometries.size());
if (geometries[i] == null) return nullptr;
if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
else return (Mesh*) geometries[i].ptr;
}
__forceinline Ref<Geometry> get_locked(size_t i) {
Lock<MutexSys> lock(geometriesMutex);
assert(i < geometries.size());
return geometries[i];
}
/* flag decoding */
__forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
__forceinline bool isCompactAccel() const { return scene_flags & RTC_SCENE_FLAG_COMPACT; }
__forceinline bool isRobustAccel() const { return scene_flags & RTC_SCENE_FLAG_ROBUST; }
__forceinline bool isStaticAccel() const { return !(scene_flags & RTC_SCENE_FLAG_DYNAMIC); }
__forceinline bool isDynamicAccel() const { return scene_flags & RTC_SCENE_FLAG_DYNAMIC; }
__forceinline bool hasArgumentFilterFunction() const {
return scene_flags & RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS;
}
__forceinline bool hasGeometryFilterFunction() {
return world.numFilterFunctions != 0;
}
__forceinline bool hasFilterFunction() {
return hasArgumentFilterFunction() || hasGeometryFilterFunction();
}
void* createQBVH6Accel();
public:
Device* device;
public:
IDPool<unsigned,0xFFFFFFFE> id_pool;
Device::vector<Ref<Geometry>> geometries = device; //!< list of all user geometries
avector<unsigned int> geometryModCounters_;
Device::vector<float*> vertices = device;
public:
/* these are to detect if we need to recreate the acceleration structures */
bool flags_modified;
unsigned int enabled_geometry_types;
RTCSceneFlags scene_flags;
RTCBuildQuality quality_flags;
MutexSys buildMutex;
MutexSys geometriesMutex;
#if defined(EMBREE_SYCL_SUPPORT)
public:
BBox3f hwaccel_bounds = empty;
AccelBuffer hwaccel;
#endif
private:
bool modified; //!< true if scene got modified
public:
std::unique_ptr<TaskGroup> taskGroup;
public:
struct BuildProgressMonitorInterface : public BuildProgressMonitor {
BuildProgressMonitorInterface(Scene* scene)
: scene(scene) {}
void operator() (size_t dn) const { scene->progressMonitor(double(dn)); }
private:
Scene* scene;
};
BuildProgressMonitorInterface progressInterface;
RTCProgressMonitorFunction progress_monitor_function;
void* progress_monitor_ptr;
std::atomic<size_t> progress_monitor_counter;
void progressMonitor(double nprims);
void setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr);
private:
GeometryCounts world; //!< counts for geometry
public:
__forceinline size_t numPrimitives() const {
return world.size();
}
__forceinline size_t getNumPrimitives(Geometry::GTypeMask mask, bool mblur) const
{
size_t count = 0;
if (mask & Geometry::MTY_TRIANGLE_MESH)
count += mblur ? world.numMBTriangles : world.numTriangles;
if (mask & Geometry::MTY_QUAD_MESH)
count += mblur ? world.numMBQuads : world.numQuads;
if (mask & Geometry::MTY_CURVE2)
count += mblur ? world.numMBLineSegments : world.numLineSegments;
if (mask & Geometry::MTY_CURVE4)
count += mblur ? world.numMBBezierCurves : world.numBezierCurves;
if (mask & Geometry::MTY_POINTS)
count += mblur ? world.numMBPoints : world.numPoints;
if (mask & Geometry::MTY_SUBDIV_MESH)
count += mblur ? world.numMBSubdivPatches : world.numSubdivPatches;
if (mask & Geometry::MTY_USER_GEOMETRY)
count += mblur ? world.numMBUserGeometries : world.numUserGeometries;
if (mask & Geometry::MTY_INSTANCE_CHEAP)
count += mblur ? world.numMBInstancesCheap : world.numInstancesCheap;
if (mask & Geometry::MTY_INSTANCE_EXPENSIVE)
count += mblur ? world.numMBInstancesExpensive : world.numInstancesExpensive;
if (mask & Geometry::MTY_INSTANCE_ARRAY)
count += mblur ? world.numMBInstanceArrays : world.numInstanceArrays;
if (mask & Geometry::MTY_GRID_MESH)
count += mblur ? world.numMBGrids : world.numGrids;
return count;
}
__forceinline size_t getNumSubPrimitives(Geometry::GTypeMask mask, bool mblur) const
{
size_t count = 0;
if (mask & Geometry::MTY_GRID_MESH)
count += mblur ? world.numMBSubGrids : world.numSubGrids;
Geometry::GTypeMask new_mask = (Geometry::GTypeMask)(mask & ~Geometry::MTY_GRID_MESH);
count += getNumPrimitives(new_mask, mblur);
return count;
}
template<typename Mesh, bool mblur>
__forceinline unsigned getNumTimeSteps()
{
if (!mblur)
return 1;
Scene::Iterator<Mesh,mblur> iter(this);
return iter.maxTimeStepsPerGeometry();
}
template<typename Mesh, bool mblur>
__forceinline unsigned int getMaxGeomID()
{
Scene::Iterator<Mesh,mblur> iter(this);
return iter.maxGeomID();
}
};
}

View file

@ -0,0 +1,764 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "geometry.h"
#include "buffer.h"
#include "../subdiv/bezier_curve.h"
#include "../subdiv/hermite_curve.h"
#include "../subdiv/bspline_curve.h"
#include "../subdiv/catmullrom_curve.h"
#include "../subdiv/linear_bezier_patch.h"
namespace embree
{
/*! represents an array of bicubic bezier curves */
struct CurveGeometry : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4;
public:
/*! bezier curve construction */
CurveGeometry (Device* device, Geometry::GType gtype);
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns the i'th curve */
__forceinline const unsigned int& curve(size_t i) const {
return curves[i];
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th tangent of the first time step */
__forceinline Vec3ff tangent(size_t i) const {
return tangents0[i];
}
/*! returns i'th normal derivative of the first time step */
__forceinline Vec3fa dnormal(size_t i) const {
return dnormals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th tangent of itime'th timestep */
__forceinline Vec3ff tangent(size_t i, size_t itime) const {
return tangents[itime][i];
}
/*! returns i'th normal derivative of itime'th timestep */
__forceinline Vec3fa dnormal(size_t i, size_t itime) const {
return dnormals[itime][i];
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const
{
p0 = vertex(i+0);
p1 = vertex(i+1);
p2 = vertex(i+2);
p3 = vertex(i+3);
}
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const
{
p0 = vertex(i+0,itime);
p1 = vertex(i+1,itime);
p2 = vertex(i+2,itime);
p3 = vertex(i+3,itime);
}
/*! gathers the curve normals starting with i'th vertex */
__forceinline void gather_normals(Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
{
n0 = normal(i+0);
n1 = normal(i+1);
n2 = normal(i+2);
n3 = normal(i+3);
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const
{
p0 = vertex(i+0);
p1 = vertex(i+1);
p2 = vertex(i+2);
p3 = vertex(i+3);
n0 = normal(i+0);
n1 = normal(i+1);
n2 = normal(i+2);
n3 = normal(i+3);
}
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const
{
p0 = vertex(i+0,itime);
p1 = vertex(i+1,itime);
p2 = vertex(i+2,itime);
p3 = vertex(i+3,itime);
n0 = normal(i+0,itime);
n1 = normal(i+1,itime);
n2 = normal(i+2,itime);
n3 = normal(i+3,itime);
}
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
__forceinline void prefetchL1_vertices(size_t i) const
{
prefetchL1(vertices0.getPtr(i)+0);
prefetchL1(vertices0.getPtr(i)+64);
}
/*! prefetches the curve starting with i'th vertex of itime'th timestep */
__forceinline void prefetchL2_vertices(size_t i) const
{
prefetchL2(vertices0.getPtr(i)+0);
prefetchL2(vertices0.getPtr(i)+64);
}
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3;
gather(a0,a1,a2,a3,i,itime);
Vec3ff b0,b1,b2,b3;
gather(b0,b1,b2,b3,i,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
}
/*! loads curve vertices for specified time */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,i,time);
else gather(p0,p1,p2,p3,i);
}
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3;
gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime);
Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3;
gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
n0 = madd(Vec3ff(t0),an0,t1*bn0);
n1 = madd(Vec3ff(t0),an1,t1*bn1);
n2 = madd(Vec3ff(t0),an2,t1*bn2);
n3 = madd(Vec3ff(t0),an3,t1*bn3);
}
/*! loads curve vertices for specified time for mblur and non-mblur case */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,n0,n1,n2,n3,i,time);
else gather(p0,p1,p2,p3,n0,n1,n2,n3,i);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
{
Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3;
unsigned int vertexID = curve(primID);
gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime);
SourceCurve3ff ccurve(v0,v1,v2,v3);
SourceCurve3fa ncurve(n0,n1,n2,n3);
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
return clerp(curve0,curve1,ftime);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime = 0.0f;
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0);
if (hasMotionBlur()) {
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1);
return clerp(curve0,curve1,ftime);
}
return curve0;
}
/*! gathers the hermite curve starting with i'th vertex */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const
{
p0 = vertex (i+0);
p1 = vertex (i+1);
t0 = tangent(i+0);
t1 = tangent(i+1);
}
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const
{
p0 = vertex (i+0,itime);
p1 = vertex (i+1,itime);
t0 = tangent(i+0,itime);
t1 = tangent(i+1,itime);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float f0 = 1.0f - ftime, f1 = ftime;
Vec3ff ap0,at0,ap1,at1;
gather_hermite(ap0,at0,ap1,at1,i,itime);
Vec3ff bp0,bt0,bp1,bt1;
gather_hermite(bp0,bt0,bp1,bt1,i,itime+1);
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
t0 = madd(Vec3ff(f0),at0,f1*bt0);
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
t1 = madd(Vec3ff(f0),at1,f1*bt1);
}
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const
{
if (hasMotionBlur()) gather_hermite(p0,t0,p1,t1,i,time);
else gather_hermite(p0,t0,p1,t1,i);
}
/*! gathers the hermite curve starting with i'th vertex */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const
{
p0 = vertex (i+0);
p1 = vertex (i+1);
t0 = tangent(i+0);
t1 = tangent(i+1);
n0 = normal(i+0);
n1 = normal(i+1);
dn0 = dnormal(i+0);
dn1 = dnormal(i+1);
}
/*! gathers the hermite curve starting with i'th vertex of itime'th timestep */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const
{
p0 = vertex (i+0,itime);
p1 = vertex (i+1,itime);
t0 = tangent(i+0,itime);
t1 = tangent(i+1,itime);
n0 = normal(i+0,itime);
n1 = normal(i+1,itime);
dn0 = dnormal(i+0,itime);
dn1 = dnormal(i+1,itime);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float f0 = 1.0f - ftime, f1 = ftime;
Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1;
gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime);
Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1;
gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1);
p0 = madd(Vec3ff(f0),ap0,f1*bp0);
t0 = madd(Vec3ff(f0),at0,f1*bt0);
n0 = madd(Vec3ff(f0),an0,f1*bn0);
dn0= madd(Vec3ff(f0),adn0,f1*bdn0);
p1 = madd(Vec3ff(f0),ap1,f1*bp1);
t1 = madd(Vec3ff(f0),at1,f1*bt1);
n1 = madd(Vec3ff(f0),an1,f1*bn1);
dn1= madd(Vec3ff(f0),adn1,f1*bdn1);
}
/*! loads curve vertices for specified time */
__forceinline void gather_hermite_safe(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const
{
if (hasMotionBlur()) gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i,time);
else gather_hermite(p0,t0,n0,dn0,p1,t1,n1,dn1,i);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const
{
Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1;
unsigned int vertexID = curve(primID);
gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime);
SourceCurve3ff ccurve(v0,t0,v1,t1);
SourceCurve3fa ncurve(n0,dn0,n1,dn1);
ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve);
return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
return clerp(curve0,curve1,ftime);
}
template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa>
__forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurveSafe(RayQueryContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const
{
float ftime = 0.0f;
const size_t itime = hasMotionBlur() ? timeSegment(time, ftime) : 0;
const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0);
if (hasMotionBlur()) {
const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1);
return clerp(curve0,curve1,ftime);
}
return curve0;
}
/* returns the projected area */
__forceinline float projectedPrimitiveArea(const size_t i) const {
return 1.0f;
}
private:
void resizeBuffers(unsigned int numSteps);
public:
BufferView<unsigned int> curves; //!< array of curve indices
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer
BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<Vec3ff>> tangents = device; //!< tangent array for each timestep
Device::vector<BufferView<Vec3fa>> dnormals = device; //!< normal derivative array for each timestep
BufferView<char> flags; //!< start, end flag per segment
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
int tessellationRate; //!< tessellation rate for flat curve
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
template<template<typename Ty> class Curve>
struct CurveGeometryInterface : public CurveGeometry
{
typedef Curve<Vec3ff> Curve3ff;
typedef Curve<Vec3fa> Curve3fa;
CurveGeometryInterface (Device* device, Geometry::GType gtype)
: CurveGeometry(device,gtype) {}
__forceinline const Curve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
Vec3ff v0 = vertex(index+0,itime);
Vec3ff v1 = vertex(index+1,itime);
Vec3ff v2 = vertex(index+2,itime);
Vec3ff v3 = vertex(index+3,itime);
v0.w *= maxRadiusScale;
v1.w *= maxRadiusScale;
v2.w *= maxRadiusScale;
v3.w *= maxRadiusScale;
return Curve3ff (v0,v1,v2,v3);
}
__forceinline const Curve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff v2 = vertex(index+2,itime);
const Vec3ff v3 = vertex(index+3,itime);
const Vec3ff w0(xfmPoint(space,(Vec3fa)v0), maxRadiusScale*v0.w);
const Vec3ff w1(xfmPoint(space,(Vec3fa)v1), maxRadiusScale*v1.w);
const Vec3ff w2(xfmPoint(space,(Vec3fa)v2), maxRadiusScale*v2.w);
const Vec3ff w3(xfmPoint(space,(Vec3fa)v3), maxRadiusScale*v3.w);
return Curve3ff(w0,w1,w2,w3);
}
__forceinline const Curve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff v2 = vertex(index+2,itime);
const Vec3ff v3 = vertex(index+3,itime);
const Vec3ff w0(xfmPoint(space,((Vec3fa)v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
const Vec3ff w1(xfmPoint(space,((Vec3fa)v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
const Vec3ff w2(xfmPoint(space,((Vec3fa)v2-ofs)*Vec3fa(scale)), maxRadiusScale*v2.w*r_scale);
const Vec3ff w3(xfmPoint(space,((Vec3fa)v3-ofs)*Vec3fa(scale)), maxRadiusScale*v3.w*r_scale);
return Curve3ff(w0,w1,w2,w3);
}
__forceinline const Curve3fa getNormalCurve(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
const Vec3fa n2 = normal(index+2,itime);
const Vec3fa n3 = normal(index+3,itime);
return Curve3fa (n0,n1,n2,n3);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
{
const Curve3ff center = getCurveScaledRadius(i,itime);
const Curve3fa normal = getNormalCurve(i,itime);
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
return ocurve;
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
}
/*! check if the i'th primitive is valid at the itime'th time step */
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = curve(i);
if (index+3 >= numVertices()) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const float r0 = radius(index+0,itime);
const float r1 = radius(index+1,itime);
const float r2 = radius(index+2,itime);
const float r3 = radius(index+3,itime);
if (!isvalid(r0) || !isvalid(r1) || !isvalid(r2) || !isvalid(r3))
return false;
const Vec3fa v0 = vertex(index+0,itime);
const Vec3fa v1 = vertex(index+1,itime);
const Vec3fa v2 = vertex(index+2,itime);
const Vec3fa v3 = vertex(index+3,itime);
if (!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3))
return false;
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
{
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
if (!isvalid(n0) || !isvalid(n1))
return false;
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
if (!isvalid(b))
return false;
}
}
return true;
}
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+1)*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+2)*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+3)*stride+ofs]);
const Curve<vfloat<N>> curve(p0,p1,p2,p3);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
}
}
void interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
};
template<template<typename Ty> class Curve>
struct HermiteCurveGeometryInterface : public CurveGeometry
{
typedef Curve<Vec3ff> HermiteCurve3ff;
typedef Curve<Vec3fa> HermiteCurve3fa;
HermiteCurveGeometryInterface (Device* device, Geometry::GType gtype)
: CurveGeometry(device,gtype) {}
__forceinline const HermiteCurve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
Vec3ff v0 = vertex(index+0,itime);
Vec3ff v1 = vertex(index+1,itime);
Vec3ff t0 = tangent(index+0,itime);
Vec3ff t1 = tangent(index+1,itime);
v0.w *= maxRadiusScale;
v1.w *= maxRadiusScale;
t0.w *= maxRadiusScale;
t1.w *= maxRadiusScale;
return HermiteCurve3ff (v0,t0,v1,t1);
}
__forceinline const HermiteCurve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
const Vec3ff V0(xfmPoint(space,(Vec3fa)v0),maxRadiusScale*v0.w);
const Vec3ff V1(xfmPoint(space,(Vec3fa)v1),maxRadiusScale*v1.w);
const Vec3ff T0(xfmVector(space,(Vec3fa)t0),maxRadiusScale*t0.w);
const Vec3ff T1(xfmVector(space,(Vec3fa)t1),maxRadiusScale*t1.w);
return HermiteCurve3ff(V0,T0,V1,T1);
}
__forceinline const HermiteCurve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = curve(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
const Vec3ff V0(xfmPoint(space,(v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale);
const Vec3ff V1(xfmPoint(space,(v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale);
const Vec3ff T0(xfmVector(space,t0*Vec3fa(scale)), maxRadiusScale*t0.w*r_scale);
const Vec3ff T1(xfmVector(space,t1*Vec3fa(scale)), maxRadiusScale*t1.w*r_scale);
return HermiteCurve3ff(V0,T0,V1,T1);
}
__forceinline const HermiteCurve3fa getNormalCurve(size_t i, size_t itime = 0) const
{
const unsigned int index = curve(i);
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
const Vec3fa dn0 = dnormal(index+0,itime);
const Vec3fa dn1 = dnormal(index+1,itime);
return HermiteCurve3fa (n0,dn0,n1,dn1);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const
{
const HermiteCurve3ff center = getCurveScaledRadius(i,itime);
const HermiteCurve3fa normal = getNormalCurve(i,itime);
const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal);
return ocurve;
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space);
}
__forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale);
}
/*! check if the i'th primitive is valid at the itime'th time step */
__forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = curve(i);
if (index+1 >= numVertices()) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
if (!isvalid4(v0) || !isvalid4(v1))
return false;
const Vec3ff t0 = tangent(index+0,itime);
const Vec3ff t1 = tangent(index+1,itime);
if (!isvalid4(t0) || !isvalid4(t1))
return false;
if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE)
{
const Vec3fa n0 = normal(index+0,itime);
const Vec3fa n1 = normal(index+1,itime);
if (!isvalid(n0) || !isvalid(n1))
return false;
const Vec3fa dn0 = dnormal(index+0,itime);
const Vec3fa dn1 = dnormal(index+1,itime);
if (!isvalid(dn0) || !isvalid(dn1))
return false;
const BBox3fa b = getOrientedCurveScaledRadius(i,itime).accurateBounds();
if (!isvalid(b))
return false;
}
}
return true;
}
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* we interpolate vertex attributes linearly for hermite basis */
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
assert(bufferSlot <= vertexAttribs.size());
const char* vsrc = vertexAttribs[bufferSlot].getPtr();
const size_t vstride = vertexAttribs[bufferSlot].getStride();
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, madd(1.0f-u,p0,u*p1));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, p1-p0);
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
}
}
/* interpolation for vertex buffers */
else
{
assert(bufferSlot < numTimeSteps);
const char* vsrc = vertices[bufferSlot].getPtr();
const char* tsrc = tangents[bufferSlot].getPtr();
const size_t vstride = vertices[bufferSlot].getStride();
const size_t tstride = vertices[bufferSlot].getStride();
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t index = curves[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]);
const vfloat<N> t0 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+0)*tstride+ofs]);
const vfloat<N> t1 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+1)*tstride+ofs]);
const HermiteCurveT<vfloat<N>> curve(p0,t0,p1,t1);
if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u));
if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u));
}
}
}
void interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
};
}
DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType);
}

View file

@ -0,0 +1,468 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! Grid Mesh */
struct GridMesh : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
/*! grid */
struct Grid
{
unsigned int startVtxID;
unsigned int lineVtxOffset;
unsigned short resX,resY;
/* border flags due to 3x3 vertex pattern */
__forceinline unsigned int get3x3FlagsX(const unsigned int x) const
{
return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
}
/* border flags due to 3x3 vertex pattern */
__forceinline unsigned int get3x3FlagsY(const unsigned int y) const
{
return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
}
/*! outputs grid structure */
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
}
};
public:
/*! grid mesh construction */
GridMesh (Device* device);
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float U = args->u;
float V = args->v;
/* clamp input u,v to [0;1] range */
U = max(min(U,1.0f),0.0f);
V = max(min(V,1.0f),0.0f);
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
const Grid& grid = grids[primID];
const int grid_width = grid.resX-1;
const int grid_height = grid.resY-1;
const float rcp_grid_width = rcp(float(grid_width));
const float rcp_grid_height = rcp(float(grid_height));
const int iu = min((int)floor(U*grid_width ),grid_width);
const int iv = min((int)floor(V*grid_height),grid_height);
const float u = U*grid_width-float(iu);
const float v = V*grid_height-float(iv);
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu;
const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu;
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]);
const vbool<N> left = u+v <= 1.0f;
const vfloat<N> Q0 = select(left,p0,p2);
const vfloat<N> Q1 = select(left,p1,p3);
const vfloat<N> Q2 = select(left,p3,p1);
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
const vfloat<N> W = 1.0f-U-V;
if (P) {
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
}
if (dPdu) {
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width);
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height);
}
if (ddPdudu) {
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
}
}
}
void addElementsToCount (GeometryCounts & counts) const;
__forceinline unsigned int getNumTotalQuads() const
{
size_t quads = 0;
for (size_t primID=0; primID<numPrimitives; primID++)
quads += getNumQuads(primID);
return quads;
}
__forceinline unsigned int getNumQuads(const size_t gridID) const
{
const Grid& g = grid(gridID);
return (unsigned int) max((int)1,((int)g.resX-1) * ((int)g.resY-1));
}
__forceinline unsigned int getNumSubGrids(const size_t gridID) const
{
const Grid& g = grid(gridID);
return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th grid*/
__forceinline const Grid& grid(size_t i) const {
return grids[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of for specified time */
__forceinline const Vec3fa vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa v0 = vertex(i, itime+0);
Vec3fa v1 = vertex(i, itime+1);
return madd(Vec3fa(t0),v0,t1*v1);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th vertex of the first timestep */
__forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
assert(x < (size_t)g.resX);
assert(y < (size_t)g.resY);
return g.startVtxID + x + y * g.lineVtxOffset;
}
/*! returns i'th vertex of the first timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index);
}
/*! returns i'th vertex of the itime'th timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index,itime);
}
/*! returns i'th vertex of the itime'th timestep */
__forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, float time) const {
const size_t index = grid_vertex_index(g,x,y);
return vertex(index,time);
}
/*! gathers quad vertices */
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y) const
{
v0 = grid_vertex(g,x+0,y+0);
v1 = grid_vertex(g,x+1,y+0);
v2 = grid_vertex(g,x+1,y+1);
v3 = grid_vertex(g,x+0,y+1);
}
/*! gathers quad vertices for specified time */
__forceinline void gather_quad_vertices(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
{
v0 = grid_vertex(g,x+0,y+0,time);
v1 = grid_vertex(g,x+1,y+0,time);
v2 = grid_vertex(g,x+1,y+1,time);
v3 = grid_vertex(g,x+0,y+1,time);
}
/*! gathers quad vertices for mblur and non-mblur meshes */
__forceinline void gather_quad_vertices_safe(Vec3fa& v0, Vec3fa& v1, Vec3fa& v2, Vec3fa& v3, const Grid& g, size_t x, size_t y, float time) const
{
if (hasMotionBlur()) gather_quad_vertices(v0,v1,v2,v3,g,x,y,time);
else gather_quad_vertices(v0,v1,v2,v3,g,x,y);
}
/*! calculates the build bounds of the i'th quad, if it's valid */
__forceinline bool buildBoundsQuad(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
{
BBox3fa b(empty);
for (size_t t=0; t<numTimeSteps; t++)
{
for (size_t y=sy;y<sy+2;y++)
for (size_t x=sx;x<sx+2;x++)
{
const Vec3fa v = grid_vertex(g,x,y,t);
if (unlikely(!isvalid(v))) return false;
b.extend(v);
}
}
bbox = b;
return true;
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
{
BBox3fa b(empty);
for (size_t t=0; t<numTimeSteps; t++)
{
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
{
const Vec3fa v = grid_vertex(g,x,y,t);
if (unlikely(!isvalid(v))) return false;
b.extend(v);
}
}
bbox = b;
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
{
assert(itime < numTimeSteps);
BBox3fa b0(empty);
for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
{
const Vec3fa v = grid_vertex(g,x,y,itime);
if (unlikely(!isvalid(v))) return false;
b0.extend(v);
}
/* use bounds of first time step in builder */
bbox = b0;
return true;
}
__forceinline bool valid(size_t gridID, size_t itime=0) const {
return valid(gridID, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
{
if (unlikely(gridID >= grids.size())) return false;
const Grid &g = grid(gridID);
if (unlikely(g.startVtxID + 0 >= vertices0.size())) return false;
if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
for (size_t y=0;y<g.resY;y++)
for (size_t x=0;x<g.resX;x++)
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(grid_vertex(g,x,y,itime))) return false;
return true;
}
__forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
{
BBox3fa box(empty);
buildBounds(g,sx,sy,itime,box);
return box;
}
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
BBox3fa bounds0, bounds1;
buildBounds(g,sx,sy,itime+0,bounds0);
buildBounds(g,sx,sy,itime+1,bounds1);
return LBBox3fa(bounds0,bounds1);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return pos_inf;
}
public:
BufferView<Grid> grids; //!< array of triangles
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attributes
#if defined(EMBREE_SYCL_SUPPORT)
public:
struct PrimID_XY { uint32_t primID; uint16_t x,y; };
Device::vector<PrimID_XY> quadID_to_primID_xy = device; //!< maps a quad to the primitive ID and grid coordinates
#endif
};
namespace isa
{
struct GridMeshISA : public GridMesh
{
GridMeshISA (Device* device)
: GridMesh(device) {}
LBBox3fa vlinearBounds(size_t buildID, const BBox1f& time_range, const SubGridBuildData * const sgrids) const override {
const SubGridBuildData &subgrid = sgrids[buildID];
const unsigned int primID = subgrid.primID;
const size_t x = subgrid.x();
const size_t y = subgrid.y();
return linearBounds(grid(primID),x,y,time_range);
}
#if defined(EMBREE_SYCL_SUPPORT)
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
const PrimID_XY& quad = quadID_to_primID_xy[j];
if (!buildBoundsQuad(grids[quad.primID],quad.x,quad.y,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
#endif
PrimInfo createPrimRefArray(mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j)) continue;
const GridMesh::Grid &g = grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
{
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)k);
pinfo.add_center2(prim);
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[k++] = prim;
}
}
}
return pinfo;
}
#if defined(EMBREE_SYCL_SUPPORT)
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
const PrimID_XY& quad = quadID_to_primID_xy[j];
const LBBox3fa lbounds = linearBounds(grids[quad.primID],quad.x,quad.y,t0t1);
const PrimRef prim(lbounds.bounds(), unsigned(geomID), unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
#endif
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const override
{
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const GridMesh::Grid &g = grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
{
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
const PrimRefMB prim(linearBounds(g,x,y,t0t1),numTimeSegments(),time_range,numTimeSegments(),unsigned(geomID),unsigned(k));
pinfoMB.add_primref(prim);
sgrids[k] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[k++] = prim;
}
}
}
return pinfoMB;
}
};
}
DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
}

View file

@ -0,0 +1,302 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "accel.h"
namespace embree
{
struct MotionDerivativeCoefficients;
/*! Instanced acceleration structure */
struct Instance : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
public:
Instance (Device* device, Accel* object = nullptr, unsigned int numTimeSteps = 1);
~Instance();
private:
Instance (const Instance& other) DELETED; // do not implement
Instance& operator= (const Instance& other) DELETED; // do not implement
private:
LBBox3fa nonlinearBounds(const BBox1f& time_range_in,
const BBox1f& geom_time_range,
float geom_time_segments) const;
BBox3fa boundSegment(size_t itime,
BBox3fa const& obbox0, BBox3fa const& obbox1,
BBox3fa const& bbox0, BBox3fa const& bbox1,
float t_min, float t_max) const;
/* calculates the (correct) interpolated bounds */
__forceinline BBox3fa bounds(size_t itime0, size_t itime1, float f) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(slerp(local2world[itime0], local2world[itime1], f),
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
return xfmBounds(lerp(local2world[itime0], local2world[itime1], f),
lerp(getObjectBounds(itime0), getObjectBounds(itime1), f));
}
public:
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
virtual void setInstancedScene(const Ref<Scene>& scene) override;
virtual void setTransform(const AffineSpace3fa& local2world, unsigned int timeStep) override;
virtual void setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) override;
virtual AffineSpace3fa getTransform(float time) override;
virtual AffineSpace3fa getTransform(size_t, float time) override;
virtual void setMask (unsigned mask) override;
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
public:
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i) const {
assert(i == 0);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[0]),object->bounds.bounds());
return xfmBounds(local2world[0],object->bounds.bounds());
}
/*! gets the bounds of the instanced scene */
__forceinline BBox3fa getObjectBounds(size_t itime) const {
return object->getBounds(timeStep(itime));
}
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
assert(i == 0);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(local2world[itime]),getObjectBounds(itime));
return xfmBounds(local2world[itime],getObjectBounds(itime));
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
assert(i == 0);
LBBox3fa lbbox = nonlinearBounds(dt, time_range, fnumTimeSegments);
return lbbox;
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
assert(i==0);
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
assert(i==0);
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds ();
return isvalid(bounds);
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
assert(i == 0);
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(bounds(i,itime))) return false;
return true;
}
__forceinline AffineSpace3fa getLocal2World() const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return quaternionDecompositionToAffineSpace(local2world[0]);
return local2world[0];
}
__forceinline AffineSpace3fa getLocal2World(float t) const
{
if (numTimeSegments() > 0) {
float ftime; const unsigned int itime = timeSegment(t, ftime);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return slerp(local2world[itime+0],local2world[itime+1],ftime);
return lerp(local2world[itime+0],local2world[itime+1],ftime);
}
return getLocal2World();
}
__forceinline AffineSpace3fa getWorld2Local() const {
return world2local0;
}
__forceinline AffineSpace3fa getWorld2Local(float t) const {
if (numTimeSegments() > 0)
return rcp(getLocal2World(t));
return getWorld2Local();
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return getWorld2LocalSlerp<K>(valid, t);
return getWorld2LocalLerp<K>(valid, t);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return area(bounds(i));
}
private:
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(slerp(AffineSpace3vff<K>(local2world[itime+0]),
AffineSpace3vff<K>(local2world[itime+1]),
ftime));
}
else {
AffineSpace3vff<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vff<K>(local2world[itime+0]), space0);
space1 = select(valid2, AffineSpace3vff<K>(local2world[itime+1]), space1);
}
return rcp(slerp(space0, space1, ftime));
}
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]),
AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]),
ftime));
} else {
AffineSpace3vf<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+0]), space0);
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)local2world[itime+1]), space1);
}
return rcp(lerp(space0, space1, ftime));
}
}
public:
Accel* object; //!< pointer to instanced acceleration structure
AffineSpace3ff* local2world; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
AffineSpace3fa world2local0; //!< transformation from world space to local space for timestep 0
};
namespace isa
{
struct InstanceISA : public Instance
{
InstanceISA (Device* device)
: Instance(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
BBox3fa b = empty;
if (!buildBounds(0,&b)) return pinfo;
// const BBox3fa b = bounds(0);
// if (!isvalid(b)) return pinfo;
const PrimRef prim(b,geomID,unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
BBox3fa b = empty;
if (!buildBounds(0,&b)) return pinfo;
// if (!valid(0,range<size_t>(itime))) return pinfo;
// const PrimRef prim(linearBounds(0,itime).bounds(),geomID,unsigned(0));
const PrimRef prim(b,geomID,unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfo pinfo(empty);
const BBox1f t0t1 = intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
const BBox3fa bounds = linearBounds(0, t0t1).bounds();
const PrimRef prim(bounds, geomID, unsigned(0));
pinfo.add_center2(prim);
prims[k++] = prim;
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
assert(r.begin() == 0);
assert(r.end() == 1);
PrimInfoMB pinfo(empty);
if (!valid(0, timeSegmentRange(t0t1))) return pinfo;
const PrimRefMB prim(linearBounds(0,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(0));
pinfo.add_primref(prim);
prims[k++] = prim;
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(Instance*, createInstance, Device*);
}

View file

@ -0,0 +1,385 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "accel.h"
namespace embree
{
struct MotionDerivativeCoefficients;
/*! Instanced acceleration structure */
struct InstanceArray : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY;
public:
InstanceArray (Device* device, unsigned int numTimeSteps = 1);
~InstanceArray();
private:
InstanceArray (const InstanceArray& other) DELETED; // do not implement
InstanceArray& operator= (const InstanceArray& other) DELETED; // do not implement
private:
LBBox3fa nonlinearBounds(size_t i,
const BBox1f& time_range_in,
const BBox1f& geom_time_range,
float geom_time_segments) const;
BBox3fa boundSegment(size_t i, size_t itime,
BBox3fa const& obbox0, BBox3fa const& obbox1,
BBox3fa const& bbox0, BBox3fa const& bbox1,
float t_min, float t_max) const;
/* calculates the (correct) interpolated bounds */
__forceinline BBox3fa bounds(size_t i, size_t itime0, size_t itime1, float f) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(slerp(l2w(i, itime0), l2w(i, itime1), f),
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
return xfmBounds(lerp(l2w(i, itime0), l2w(i, itime1), f),
lerp(getObjectBounds(i, itime0), getObjectBounds(i, itime1), f));
}
public:
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
virtual void* getBuffer(RTCBufferType type, unsigned int slot) override;
virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
virtual void setInstancedScene(const Ref<Scene>& scene) override;
virtual void setInstancedScenes(const RTCScene* scenes, size_t numScenes) override;
virtual AffineSpace3fa getTransform(size_t, float time) override;
virtual void setMask (unsigned mask) override;
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
public:
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i) const {
if (!valid(i))
return BBox3fa();
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, 0)),getObject(i)->bounds.bounds());
return xfmBounds(l2w(i, 0),getObject(i)->bounds.bounds());
}
/*! gets the bounds of the instanced scene */
__forceinline BBox3fa getObjectBounds(size_t i, size_t itime) const {
if (!valid(i))
return BBox3fa();
return getObject(i)->getBounds(timeStep(itime));
}
/*! calculates the bounds of instance */
__forceinline BBox3fa bounds(size_t i, size_t itime) const {
if (!valid(i))
return BBox3fa();
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return xfmBounds(quaternionDecompositionToAffineSpace(l2w(i, itime)),getObjectBounds(i, itime));
return xfmBounds(l2w(i, itime),getObjectBounds(i, itime));
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t i, const BBox1f& dt) const {
if (!valid(i))
return LBBox3fa();
LBBox3fa lbbox = nonlinearBounds(i, dt, time_range, fnumTimeSegments);
return lbbox;
}
/*! calculates the build bounds of the i'th item, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
if (!valid(i))
return false;
const BBox3fa b = bounds(i);
if (bbox) *bbox = b;
return isvalid(b);
}
/*! calculates the build bounds of the i'th item at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i))
return false;
const LBBox3fa bounds = linearBounds(i,itime);
bbox = bounds.bounds ();
return isvalid(bounds);
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return numPrimitives;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return numPrimitives != otherVersion;
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i) const
{
if (object) return true;
return (object_ids[i] != (unsigned int)(-1));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
if (!isvalid(bounds(i,itime))) return false;
return true;
}
__forceinline AffineSpace3fa getLocal2World(size_t i) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return quaternionDecompositionToAffineSpace(l2w(i,0));
return l2w(i, 0);
}
__forceinline AffineSpace3fa getLocal2World(size_t i, float t) const
{
if (numTimeSegments() > 0) {
float ftime; const unsigned int itime = timeSegment(t, ftime);
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return slerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
return lerp(l2w(i, itime+0),l2w(i, itime+1),ftime);
}
return getLocal2World(i);
}
__forceinline AffineSpace3fa getWorld2Local(size_t i) const {
return rcp(getLocal2World(i));
}
__forceinline AffineSpace3fa getWorld2Local(size_t i, float t) const {
return rcp(getLocal2World(i, t));
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2Local(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
return getWorld2LocalSlerp<K>(i, valid, t);
return getWorld2LocalLerp<K>(i, valid, t);
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
return area(bounds(i));
}
inline Accel* getObject(size_t i) const {
if (object) {
return object;
}
assert(objects);
assert(i < numPrimitives);
if (object_ids[i] == (unsigned int)(-1))
return nullptr;
assert(object_ids[i] < numObjects);
return objects[object_ids[i]];
}
private:
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalSlerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(slerp(AffineSpace3vff<K>(l2w(i, itime+0)),
AffineSpace3vff<K>(l2w(i, itime+1)),
ftime));
}
else {
AffineSpace3vff<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+0)), space0);
space1 = select(valid2, AffineSpace3vff<K>(l2w(i, itime+1)), space1);
}
return rcp(slerp(space0, space1, ftime));
}
}
template<int K>
__forceinline AffineSpace3vf<K> getWorld2LocalLerp(size_t i, const vbool<K>& valid, const vfloat<K>& t) const
{
vfloat<K> ftime;
const vint<K> itime_k = timeSegment<K>(t, ftime);
assert(any(valid));
const size_t index = bsf(movemask(valid));
const int itime = itime_k[index];
if (likely(all(valid, itime_k == vint<K>(itime)))) {
return rcp(lerp(AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)),
AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)),
ftime));
} else {
AffineSpace3vf<K> space0,space1;
vbool<K> valid1 = valid;
while (any(valid1)) {
vbool<K> valid2;
const int itime = next_unique(valid1, itime_k, valid2);
space0 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+0)), space0);
space1 = select(valid2, AffineSpace3vf<K>((AffineSpace3fa)l2w(i, itime+1)), space1);
}
return rcp(lerp(space0, space1, ftime));
}
}
private:
__forceinline AffineSpace3ff l2w(size_t i, size_t itime) const {
if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR) {
return *(AffineSpace3ff*)(l2w_buf[itime].getPtr(i));
}
else if(l2w_buf[itime].getFormat() == RTC_FORMAT_QUATERNION_DECOMPOSITION) {
AffineSpace3ff transform;
QuaternionDecomposition* qd = (QuaternionDecomposition*)l2w_buf[itime].getPtr(i);
transform.l.vx.x = qd->scale_x;
transform.l.vy.y = qd->scale_y;
transform.l.vz.z = qd->scale_z;
transform.l.vy.x = qd->skew_xy;
transform.l.vz.x = qd->skew_xz;
transform.l.vz.y = qd->skew_yz;
transform.l.vx.y = qd->translation_x;
transform.l.vx.z = qd->translation_y;
transform.l.vy.z = qd->translation_z;
transform.p.x = qd->shift_x;
transform.p.y = qd->shift_y;
transform.p.z = qd->shift_z;
// normalize quaternion
Quaternion3f q(qd->quaternion_r, qd->quaternion_i, qd->quaternion_j, qd->quaternion_k);
q = normalize(q);
transform.l.vx.w = q.i;
transform.l.vy.w = q.j;
transform.l.vz.w = q.k;
transform.p.w = q.r;
return transform;
}
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR) {
AffineSpace3f* l2w = reinterpret_cast<AffineSpace3f*>(l2w_buf[itime].getPtr(i));
return AffineSpace3ff(*l2w);
}
else if (l2w_buf[itime].getFormat() == RTC_FORMAT_FLOAT3X4_ROW_MAJOR) {
float* data = reinterpret_cast<float*>(l2w_buf[itime].getPtr(i));
AffineSpace3f l2w;
l2w.l.vx.x = data[0]; l2w.l.vy.x = data[1]; l2w.l.vz.x = data[2]; l2w.p.x = data[3];
l2w.l.vx.y = data[4]; l2w.l.vy.y = data[5]; l2w.l.vz.y = data[6]; l2w.p.y = data[7];
l2w.l.vx.z = data[8]; l2w.l.vy.z = data[9]; l2w.l.vz.z = data[10]; l2w.p.z = data[11];
return l2w;
}
assert(false);
return AffineSpace3ff();
}
inline AffineSpace3ff l2w(size_t i) const {
return l2w(i, 0);
}
private:
Accel* object; //!< fast path if only one scene is instanced
Accel** objects;
uint32_t numObjects;
Device::vector<RawBufferView> l2w_buf = device; //!< transformation from local space to world space for each timestep (either normal matrix or quaternion decomposition)
BufferView<uint32_t> object_ids; //!< array of scene ids per instance array primitive
};
namespace isa
{
struct InstanceArrayISA : public InstanceArray
{
InstanceArrayISA (Device* device)
: InstanceArray(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, &bounds) || !valid(j))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, itime, bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = linearBounds(j, t0t1);
if (!isvalid(lbounds.bounds()))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
if (!valid(j, timeSegmentRange(t0t1)))
continue;
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(InstanceArray*, createInstanceArray, Device*);
}

View file

@ -0,0 +1,634 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! represents an array of line segments */
struct LineSegments : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE2;
public:
/*! line segments construction */
LineSegments (Device* device, Geometry::GType gtype);
public:
void setMask (unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify ();
void interpolate(const RTCInterpolateArguments* const args);
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* ddPdudu = args->ddPdudu;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
const size_t ofs = i*sizeof(float);
const size_t segment = segments[primID];
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+0)*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+1)*stride+ofs]);
if (P ) mem<vfloat<N>>::storeu(valid,P+i,lerp(p0,p1,u));
if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0);
if (ddPdudu) mem<vfloat<N>>::storeu(valid,dPdu+i,vfloat<N>(zero));
}
}
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns the i'th segment */
__forceinline const unsigned int& segment(size_t i) const {
return segments[i];
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
/*! returns the i'th segment */
template<int M>
__forceinline const vuint<M> vsegment(const vuint<M>& i) const {
return segments[i.v];
}
#endif
/*! returns the segment to the left of the i'th segment */
__forceinline bool segmentLeftExists(size_t i) const {
assert (flags);
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_LEFT) != 0;
}
/*! returns the segment to the right of the i'th segment */
__forceinline bool segmentRightExists(size_t i) const {
assert (flags);
return (flags[i] & RTC_CURVE_FLAG_NEIGHBOR_RIGHT) != 0;
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid) const
{
p0 = vertex(vid+0);
p1 = vertex(vid+1);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid) const
{
p0 = vertex(vid.v+0);
p1 = vertex(vid.v+1);
}
#endif
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, size_t itime) const
{
p0 = vertex(vid+0,itime);
p1 = vertex(vid+1,itime);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vint<M>& itime) const
{
p0 = vertex(vid.v+0,itime.v);
p1 = vertex(vid.v+1,itime.v);
}
#endif
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1; gather(a0,a1,vid,itime);
Vec3ff b0,b1; gather(b0,b1,vid,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
}
/*! loads curve vertices for specified time for mblur and non-mblur case */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, unsigned int vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,vid,time);
else gather(p0,p1,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, const vuint<M>& vid, const vfloat<M>& time) const
{
vfloat<M> ftime;
const vint<M> itime = timeSegment<M>(time, ftime);
const vfloat<M> t0 = 1.0f - ftime;
const vfloat<M> t1 = ftime;
Vec4vf<M> a0,a1; vgather<M>(a0,a1,vid,itime);
Vec4vf<M> b0,b1; vgather<M>(b0,b1,vid,itime+1);
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
}
#endif
/*! gathers the cone curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, unsigned int vid) const
{
gather(p0,p1,vid);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid) const
{
vgather<M>(p0,p1,vid);
cL = !segmentLeftExists (primID.v);
cR = !segmentRightExists(primID.v);
}
#endif
/*! gathers the cone curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, size_t itime) const
{
gather(p0,p1,vid,itime);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
/*! loads cone curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
{
gather(p0,p1,vid,time);
cL = !segmentLeftExists (primID);
cR = !segmentRightExists(primID);
}
/*! loads cone curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, bool& cL, bool& cR, unsigned int primID, size_t vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,cL,cR,primID,vid,time);
else gather(p0,p1,cL,cR,primID,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, vbool<M>& cL, vbool<M>& cR, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
{
vgather<M>(p0,p1,vid,time);
cL = !segmentLeftExists (primID.v);
cR = !segmentRightExists(primID.v);
}
#endif
/*! gathers the curve starting with i'th vertex */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid) const
{
p0 = vertex(vid+0);
p1 = vertex(vid+1);
p2 = segmentLeftExists (primID) ? vertex(vid-1) : Vec3ff(inf);
p3 = segmentRightExists(primID) ? vertex(vid+2) : Vec3ff(inf);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid) const
{
p0 = vertex(vid.v+0);
p1 = vertex(vid.v+1);
vbool<M> left = segmentLeftExists (primID.v);
vbool<M> right = segmentRightExists(primID.v);
vuint<M> i2 = select(left, vid-1,vid+0);
vuint<M> i3 = select(right,vid+2,vid+1);
p2 = vertex(i2.v);
p3 = vertex(i3.v);
p2 = select(left, p2,Vec4vf<M>(inf));
p3 = select(right,p3,Vec4vf<M>(inf));
}
#endif
/*! gathers the curve starting with i'th vertex of itime'th timestep */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, size_t itime) const
{
p0 = vertex(vid+0,itime);
p1 = vertex(vid+1,itime);
p2 = segmentLeftExists (primID) ? vertex(vid-1,itime) : Vec3ff(inf);
p3 = segmentRightExists(primID) ? vertex(vid+2,itime) : Vec3ff(inf);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vint<M>& itime) const
{
p0 = vertex(vid.v+0, itime.v);
p1 = vertex(vid.v+1, itime.v);
vbool<M> left = segmentLeftExists (primID.v);
vbool<M> right = segmentRightExists(primID.v);
vuint<M> i2 = select(left, vid-1,vid+0);
vuint<M> i3 = select(right,vid+2,vid+1);
p2 = vertex(i2.v, itime.v);
p3 = vertex(i3.v, itime.v);
p2 = select(left, p2,Vec4vf<M>(inf));
p3 = select(right,p3,Vec4vf<M>(inf));
}
#endif
/*! loads curve vertices for specified time */
__forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff a0,a1,a2,a3; gather(a0,a1,a2,a3,primID,vid,itime);
Vec3ff b0,b1,b2,b3; gather(b0,b1,b2,b3,primID,vid,itime+1);
p0 = madd(Vec3ff(t0),a0,t1*b0);
p1 = madd(Vec3ff(t0),a1,t1*b1);
p2 = madd(Vec3ff(t0),a2,t1*b2);
p3 = madd(Vec3ff(t0),a3,t1*b3);
}
/*! loads curve vertices for specified time for mblur and non-mblur geometry */
__forceinline void gather_safe(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, unsigned int primID, size_t vid, float time) const
{
if (hasMotionBlur()) gather(p0,p1,p2,p3,primID,vid,time);
else gather(p0,p1,p2,p3,primID,vid);
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
template<int M>
__forceinline void vgather(Vec4vf<M>& p0, Vec4vf<M>& p1, Vec4vf<M>& p2, Vec4vf<M>& p3, const vuint<M>& primID, const vuint<M>& vid, const vfloat<M>& time) const
{
vfloat<M> ftime;
const vint<M> itime = timeSegment<M>(time, ftime);
const vfloat<M> t0 = 1.0f - ftime;
const vfloat<M> t1 = ftime;
Vec4vf<M> a0,a1,a2,a3; vgather<M>(a0,a1,a2,a3,primID,vid,itime);
Vec4vf<M> b0,b1,b2,b3; vgather<M>(b0,b1,b2,b3,primID,vid,itime+1);
p0 = madd(Vec4vf<M>(t0),a0,t1*b0);
p1 = madd(Vec4vf<M>(t0),a1,t1*b1);
p2 = madd(Vec4vf<M>(t0),a2,t1*b2);
p3 = madd(Vec4vf<M>(t0),a3,t1*b3);
}
#endif
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const Vec3ff& v0, const Vec3ff& v1) const
{
const BBox3ff b = merge(BBox3ff(v0),BBox3ff(v1));
return enlarge((BBox3fa)b,maxRadiusScale*Vec3fa(max(v0.w,v1.w)));
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(size_t i) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0);
const Vec3ff v1 = vertex(index+1);
return bounds(v0,v1);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
return bounds(v0,v1);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0);
const Vec3ff v1 = vertex(index+1);
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
return bounds(w0,w1);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
{
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff w0(xfmVector(space,(Vec3fa)v0),v0.w);
const Vec3ff w1(xfmVector(space,(Vec3fa)v1),v1.w);
return bounds(w0,w1);
}
/*! calculates bounding box of i'th segment */
__forceinline BBox3fa bounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const
{
const float r_scale = r_scale0*scale;
const unsigned int index = segment(i);
const Vec3ff v0 = vertex(index+0,itime);
const Vec3ff v1 = vertex(index+1,itime);
const Vec3ff w0(xfmVector(space,(v0-ofs)*Vec3fa(scale)),maxRadiusScale*v0.w*r_scale);
const Vec3ff w1(xfmVector(space,(v1-ofs)*Vec3fa(scale)),maxRadiusScale*v1.w*r_scale);
return bounds(w0,w1);
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = segment(i);
if (index+1 >= numVertices()) return false;
#if !defined(__SYCL_DEVICE_ONLY__)
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
const Vec3ff v0 = vertex(index+0,itime); if (unlikely(!isvalid4(v0))) return false;
const Vec3ff v1 = vertex(index+1,itime); if (unlikely(!isvalid4(v1))) return false;
if (min(v0.w,v1.w) < 0.0f) return false;
}
#endif
return true;
}
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
{
if (!valid(i,0)) return false;
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i,itime+0) || !valid(i,itime+1)) return false;
bbox = bounds(i,itime); // use bounds of first time step in builder
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(ofs, scale, r_scale0, space, primID, itime); }, dt, this->time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
public:
BufferView<unsigned int> segments; //!< array of line segment indices
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
BufferView<char> flags; //!< start, end flag per segment
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
int tessellationRate; //!< tessellation rate for bezier curve
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
struct LineSegmentsISA : public LineSegments
{
LineSegmentsISA (Device* device, Geometry::GType gtype)
: LineSegments(device,gtype) {}
LinearSpace3fa computeAlignedSpace(const size_t primID) const
{
const Vec3fa dir = normalize(computeDirection(primID));
if (is_finite(dir)) return frame(dir);
else return LinearSpace3fa(one);
}
LinearSpace3fa computeAlignedSpaceMB(const size_t primID, const BBox1f time_range) const
{
Vec3fa axisz(0,0,1);
Vec3fa axisy(0,1,0);
const range<int> tbounds = this->timeSegmentRange(time_range);
if (tbounds.size() == 0) return frame(axisz);
const size_t itime = (tbounds.begin()+tbounds.end())/2;
const Vec3fa dir = normalize(computeDirection(primID,itime));
if (is_finite(dir)) return frame(dir);
else return LinearSpace3fa(one);
}
Vec3fa computeDirection(unsigned int primID) const
{
const unsigned vtxID = segment(primID);
const Vec3fa v0 = vertex(vtxID+0);
const Vec3fa v1 = vertex(vtxID+1);
return v1-v0;
}
Vec3fa computeDirection(unsigned int primID, size_t time) const
{
const unsigned vtxID = segment(primID);
const Vec3fa v0 = vertex(vtxID+0,time);
const Vec3fa v1 = vertex(vtxID+1,time);
return v1-v0;
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
BBox3fa vbounds(size_t i) const {
return bounds(i);
}
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const {
return bounds(space,i);
}
BBox3fa vbounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const {
return bounds(ofs,scale,r_scale0,space,i,itime);
}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
return linearBounds(space,primID,time_range);
}
LBBox3fa vlinearBounds(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const {
return linearBounds(ofs,scale,r_scale0,space,primID,time_range);
}
};
}
DECLARE_ISA_FUNCTION(LineSegments*, createLineSegments, Device* COMMA Geometry::GType);
}

View file

@ -0,0 +1,361 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "buffer.h"
#include "default.h"
#include "geometry.h"
namespace embree
{
/*! represents an array of points */
struct Points : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_POINTS;
public:
/*! line segments construction */
Points(Device* device, Geometry::GType gtype);
public:
void setMask(unsigned mask);
void setNumTimeSteps(unsigned int numTimeSteps);
void setVertexAttributeCount(unsigned int N);
void setBuffer(RTCBufferType type,
unsigned int slot,
RTCFormat format,
const Ref<Buffer>& buffer,
size_t offset,
size_t stride,
unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
public:
/*! returns the number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th vertex of the first time step */
__forceinline Vec3ff vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th normal of the first time step */
__forceinline Vec3fa normal(size_t i) const {
return normals0[i];
}
/*! returns i'th radius of the first time step */
__forceinline float radius(size_t i) const {
return vertices0[i].w;
}
/*! returns i'th vertex of itime'th timestep */
__forceinline Vec3ff vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3ff vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3ff v0 = vertex(i, itime+0);
Vec3ff v1 = vertex(i, itime+1);
return madd(Vec3ff(t0),v0,t1*v1);
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3ff vertex_safe(size_t i, float time) const
{
if (hasMotionBlur()) return vertex(i,time);
else return vertex(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th normal of itime'th timestep */
__forceinline Vec3fa normal(size_t i, size_t itime) const {
return normals[itime][i];
}
/*! returns i'th normal of for specified time */
__forceinline Vec3fa normal(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa n0 = normal(i, itime+0);
Vec3fa n1 = normal(i, itime+1);
return madd(Vec3fa(t0),n0,t1*n1);
}
/*! returns i'th normal of for specified time */
__forceinline Vec3fa normal_safe(size_t i, float time) const
{
if (hasMotionBlur()) return normal(i,time);
else return normal(i);
}
/*! returns i'th radius of itime'th timestep */
__forceinline float radius(size_t i, size_t itime) const {
return vertices[itime][i].w;
}
/*! returns i'th radius of for specified time */
__forceinline float radius(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
float r0 = radius(i, itime+0);
float r1 = radius(i, itime+1);
return madd(t0,r0,t1*r1);
}
/*! returns i'th radius of for specified time */
__forceinline float radius_safe(size_t i, float time) const
{
if (hasMotionBlur()) return radius(i,time);
else return radius(i);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const Vec3ff& v0) const {
return enlarge(BBox3fa(v0), maxRadiusScale*Vec3fa(v0.w));
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(size_t i) const
{
const Vec3ff v0 = vertex(i);
return bounds(v0);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const Vec3ff v0 = vertex(i, itime);
return bounds(v0);
}
/*! calculates bounding box of i'th line segment */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i) const
{
const Vec3ff v0 = vertex(i);
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
return bounds(w0);
}
/*! calculates bounding box of i'th line segment for the itime'th time step */
__forceinline BBox3fa bounds(const LinearSpace3fa& space, size_t i, size_t itime) const
{
const Vec3ff v0 = vertex(i, itime);
const Vec3ff w0(xfmVector(space, (Vec3fa)v0), v0.w);
return bounds(w0);
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const unsigned int index = (unsigned int)i;
if (index >= numVertices())
return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) {
const Vec3ff v0 = vertex(index + 0, itime);
if (unlikely(!isvalid4(v0)))
return false;
if (v0.w < 0.0f)
return false;
}
return true;
}
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i, itime + 0), bounds(i, itime + 1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox) const
{
if (!valid(i, 0))
return false;
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
if (!valid(i, itime + 0) || !valid(i, itime + 1))
return false;
bbox = bounds(i, itime); // use bounds of first time step in builder
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&](size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& dt) const {
return LBBox3fa([&](size_t itime) { return bounds(space, primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& time_range, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(time_range))) return false;
bbox = linearBounds(i, time_range);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
__forceinline float projectedPrimitiveArea(const size_t i) const {
const float R = radius(i);
return 1 + 2*M_PI*R*R;
}
public:
BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer
BufferView<Vec3fa> normals0; //!< fast access to first normal buffer
Device::vector<BufferView<Vec3ff>> vertices = device; //!< vertex array for each timestep
Device::vector<BufferView<Vec3fa>> normals = device; //!< normal array for each timestep
Device::vector<BufferView<char>> vertexAttribs = device; //!< user buffers
float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii
};
namespace isa
{
struct PointsISA : public Points
{
PointsISA(Device* device, Geometry::GType gtype) : Points(device, gtype) {}
Vec3fa computeDirection(unsigned int primID) const
{
return Vec3fa(1, 0, 0);
}
Vec3fa computeDirection(unsigned int primID, size_t time) const
{
return Vec3fa(1, 0, 0);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, &bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
BBox3fa bounds = empty;
if (!buildBounds(j, itime, bounds))
continue;
const PrimRef prim(bounds, geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims,
const BBox1f& t0t1,
const range<size_t>& r,
size_t k,
unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j = r.begin(); j < r.end(); j++) {
if (!valid(j, timeSegmentRange(t0t1)))
continue;
const PrimRefMB prim(linearBounds(j, t0t1), this->numTimeSegments(), this->time_range, this->numTimeSegments(), geomID, unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
BBox3fa vbounds(size_t i) const
{
return bounds(i);
}
BBox3fa vbounds(const LinearSpace3fa& space, size_t i) const
{
return bounds(space, i);
}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const
{
return linearBounds(primID, time_range);
}
LBBox3fa vlinearBounds(const LinearSpace3fa& space, size_t primID, const BBox1f& time_range) const
{
return linearBounds(space, primID, time_range);
}
};
} // namespace isa
DECLARE_ISA_FUNCTION(Points*, createPoints, Device* COMMA Geometry::GType);
} // namespace embree

View file

@ -0,0 +1,376 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! Quad Mesh */
struct QuadMesh : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_QUAD_MESH;
/*! triangle indices */
struct Quad
{
Quad() {}
Quad (uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) {
v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3;
}
/*! outputs triangle indices */
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Quad& q) {
return cout << "Quad {" << q.v[0] << ", " << q.v[1] << ", " << q.v[2] << ", " << q.v[3] << " }";
}
uint32_t v[4];
};
public:
/*! quad mesh construction */
QuadMesh (Device* device);
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
void addElementsToCount (GeometryCounts & counts) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
float v = args->v;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const size_t ofs = i*sizeof(float);
const Quad& tri = quad(primID);
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]);
const vbool<N> left = u+v <= 1.0f;
const vfloat<N> Q0 = select(left,p0,p2);
const vfloat<N> Q1 = select(left,p1,p3);
const vfloat<N> Q2 = select(left,p3,p1);
const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u);
const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v);
const vfloat<N> W = 1.0f-U-V;
if (P) {
mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
}
if (dPdu) {
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1));
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2));
}
if (ddPdudu) {
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
}
}
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th quad */
__forceinline const Quad& quad(size_t i) const {
return quads[i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3fa vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa v0 = vertex(i, itime+0);
Vec3fa v1 = vertex(i, itime+1);
return madd(Vec3fa(t0),v0,t1*v1);
}
/*! calculates the bounds of the i'th quad */
__forceinline BBox3fa bounds(size_t i) const
{
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0]);
const Vec3fa v1 = vertex(q.v[1]);
const Vec3fa v2 = vertex(q.v[2]);
const Vec3fa v3 = vertex(q.v[3]);
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
}
/*! calculates the bounds of the i'th quad at the itime'th timestep */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0],itime);
const Vec3fa v1 = vertex(q.v[1],itime);
const Vec3fa v2 = vertex(q.v[2],itime);
const Vec3fa v3 = vertex(q.v[3],itime);
return BBox3fa(min(v0,v1,v2,v3),max(v0,v1,v2,v3));
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const Quad& q = quad(i);
if (unlikely(q.v[0] >= numVertices())) return false;
if (unlikely(q.v[1] >= numVertices())) return false;
if (unlikely(q.v[2] >= numVertices())) return false;
if (unlikely(q.v[3] >= numVertices())) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
if (!isvalid(vertex(q.v[0],itime))) return false;
if (!isvalid(vertex(q.v[1],itime))) return false;
if (!isvalid(vertex(q.v[2],itime))) return false;
if (!isvalid(vertex(q.v[3],itime))) return false;
}
return true;
}
/*! calculates the linear bounds of the i'th quad at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
const Quad& q = quad(i);
if (q.v[0] >= numVertices()) return false;
if (q.v[1] >= numVertices()) return false;
if (q.v[2] >= numVertices()) return false;
if (q.v[3] >= numVertices()) return false;
for (size_t t=0; t<numTimeSteps; t++)
{
const Vec3fa v0 = vertex(q.v[0],t);
const Vec3fa v1 = vertex(q.v[1],t);
const Vec3fa v2 = vertex(q.v[2],t);
const Vec3fa v3 = vertex(q.v[3],t);
if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)))
return false;
}
if (bbox)
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
const Quad& q = quad(i);
if (unlikely(q.v[0] >= numVertices())) return false;
if (unlikely(q.v[1] >= numVertices())) return false;
if (unlikely(q.v[2] >= numVertices())) return false;
if (unlikely(q.v[3] >= numVertices())) return false;
assert(itime+1 < numTimeSteps);
const Vec3fa a0 = vertex(q.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
const Vec3fa a1 = vertex(q.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
const Vec3fa a2 = vertex(q.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
const Vec3fa a3 = vertex(q.v[3],itime+0); if (unlikely(!isvalid(a3))) return false;
const Vec3fa b0 = vertex(q.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
const Vec3fa b1 = vertex(q.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
const Vec3fa b2 = vertex(q.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
const Vec3fa b3 = vertex(q.v[3],itime+1); if (unlikely(!isvalid(b3))) return false;
/* use bounds of first time step in builder */
bbox = BBox3fa(min(a0,a1,a2,a3),max(a0,a1,a2,a3));
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const
{
if (!valid(i, timeSegmentRange(dt))) return false;
bbox = linearBounds(i, dt);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return quads.modCounter;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return quads.isModified(otherVersion); // || numPrimitivesChanged;
}
/* returns the projected area */
__forceinline float projectedPrimitiveArea(const size_t i) const {
const Quad& q = quad(i);
const Vec3fa v0 = vertex(q.v[0]);
const Vec3fa v1 = vertex(q.v[1]);
const Vec3fa v2 = vertex(q.v[2]);
const Vec3fa v3 = vertex(q.v[3]);
return areaProjectedTriangle(v0,v1,v3) +
areaProjectedTriangle(v1,v2,v3);
}
public:
BufferView<Quad> quads; //!< array of quads
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attribute buffers
};
namespace isa
{
struct QuadMeshISA : public QuadMesh
{
QuadMeshISA (Device* device)
: QuadMesh(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(QuadMesh*, createQuadMesh, Device*);
}

View file

@ -0,0 +1,329 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
#include "../subdiv/half_edge.h"
#include "../subdiv/tessellation_cache.h"
#include "../subdiv/catmullclark_coefficients.h"
#include "../subdiv/patch.h"
namespace embree
{
struct HoleSet;
struct VertexCreaseMap;
struct EdgeCreaseMap;
class SubdivMesh : public Geometry
{
ALIGNED_CLASS_(16);
public:
typedef HalfEdge::Edge Edge;
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_SUBDIV_MESH;
/*! structure used to sort half edges using radix sort by their key */
struct KeyHalfEdge
{
KeyHalfEdge() {}
KeyHalfEdge (uint64_t key, HalfEdge* edge)
: key(key), edge(edge) {}
__forceinline operator uint64_t() const {
return key;
}
friend __forceinline bool operator<(const KeyHalfEdge& e0, const KeyHalfEdge& e1) {
return e0.key < e1.key;
}
public:
uint64_t key;
HalfEdge* edge;
};
public:
/*! subdiv mesh construction */
SubdivMesh(Device* device);
~SubdivMesh();
public:
void setMask (unsigned mask);
void setSubdivisionMode (unsigned int topologyID, RTCSubdivisionMode mode);
void setVertexAttributeTopology(unsigned int vertexAttribID, unsigned int topologyID);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setTopologyCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void setTessellationRate(float N);
bool verify();
void commit();
void addElementsToCount (GeometryCounts & counts) const;
void setDisplacementFunction (RTCDisplacementFunctionN func);
unsigned int getFirstHalfEdge(unsigned int faceID);
unsigned int getFace(unsigned int edgeID);
unsigned int getNextHalfEdge(unsigned int edgeID);
unsigned int getPreviousHalfEdge(unsigned int edgeID);
unsigned int getOppositeHalfEdge(unsigned int topologyID, unsigned int edgeID);
public:
/*! return the number of faces */
size_t numFaces() const {
return faceVertices.size();
}
/*! return the number of edges */
size_t numEdges() const {
return topology[0].vertexIndices.size();
}
/*! return the number of vertices */
size_t numVertices() const {
return vertices[0].size();
}
/*! calculates the bounds of the i'th subdivision patch at the j'th timestep */
__forceinline BBox3fa bounds(size_t i, size_t j = 0) const {
return topology[0].getHalfEdge(i)->bounds(vertices[j]);
}
/*! check if the i'th primitive is valid */
__forceinline bool valid(size_t i) const {
return topology[0].valid(i) && !invalidFace(i);
}
/*! check if the i'th primitive is valid for the j'th time range */
__forceinline bool valid(size_t i, size_t j) const {
return topology[0].valid(i) && !invalidFace(i,j);
}
/*! prints some statistics */
void printStatistics();
/*! initializes the half edge data structure */
void initializeHalfEdgeStructures ();
public:
/*! returns the vertex buffer for some time step */
__forceinline const BufferView<Vec3fa>& getVertexBuffer( const size_t t = 0 ) const {
return vertices[t];
}
/* returns tessellation level of edge */
__forceinline float getEdgeLevel(const size_t i) const
{
if (levels) return clamp(levels[i],1.0f,4096.0f); // FIXME: do we want to limit edge level?
else return clamp(tessellationRate,1.0f,4096.0f); // FIXME: do we want to limit edge level?
}
public:
RTCDisplacementFunctionN displFunc; //!< displacement function
/*! all buffers in this section are provided by the application */
public:
/*! the topology contains all data that may differ when
* interpolating different user data buffers */
struct Topology
{
public:
/*! Default topology construction */
Topology () : halfEdges(nullptr,0) {}
/*! Topology initialization */
Topology (SubdivMesh* mesh);
/*! make the class movable */
public:
Topology (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
: mesh(std::move(other.mesh)),
vertexIndices(std::move(other.vertexIndices)),
subdiv_mode(std::move(other.subdiv_mode)),
halfEdges(std::move(other.halfEdges)),
halfEdges0(std::move(other.halfEdges0)),
halfEdges1(std::move(other.halfEdges1)) {}
Topology& operator= (Topology&& other) // FIXME: this is only required to workaround compilation issues under Windows
{
mesh = std::move(other.mesh);
vertexIndices = std::move(other.vertexIndices);
subdiv_mode = std::move(other.subdiv_mode);
halfEdges = std::move(other.halfEdges);
halfEdges0 = std::move(other.halfEdges0);
halfEdges1 = std::move(other.halfEdges1);
return *this;
}
public:
/*! check if the i'th primitive is valid in this topology */
__forceinline bool valid(size_t i) const
{
if (unlikely(subdiv_mode == RTC_SUBDIVISION_MODE_NO_BOUNDARY)) {
if (getHalfEdge(i)->faceHasBorder()) return false;
}
return true;
}
/*! updates the interpolation mode for the topology */
void setSubdivisionMode (RTCSubdivisionMode mode);
/*! marks all buffers as modified */
void update ();
/*! verifies index array */
bool verify (size_t numVertices);
/*! initializes the half edge data structure */
void initializeHalfEdgeStructures ();
private:
/*! recalculates the half edges */
void calculateHalfEdges();
/*! updates half edges when recalculation is not necessary */
void updateHalfEdges();
/*! user input data */
public:
SubdivMesh* mesh;
/*! indices of the vertices composing each face */
BufferView<unsigned int> vertexIndices;
/*! subdiv interpolation mode */
RTCSubdivisionMode subdiv_mode;
/*! generated data */
public:
/*! returns the start half edge for face f */
__forceinline const HalfEdge* getHalfEdge ( const size_t f ) const {
return &halfEdges[mesh->faceStartEdge[f]];
}
/*! Half edge structure, generated by initHalfEdgeStructures */
mvector<HalfEdge> halfEdges;
/*! the following data is only required during construction of the
* half edge structure and can be cleared for static scenes */
private:
/*! two arrays used to sort the half edges */
std::vector<KeyHalfEdge> halfEdges0;
std::vector<KeyHalfEdge> halfEdges1;
};
/*! returns the start half edge for topology t and face f */
__forceinline const HalfEdge* getHalfEdge ( const size_t t , const size_t f ) const {
return topology[t].getHalfEdge(f);
}
/*! buffer containing the number of vertices for each face */
BufferView<unsigned int> faceVertices;
/*! array of topologies */
vector<Topology> topology;
/*! vertex buffer (one buffer for each time step) */
vector<BufferView<Vec3fa>> vertices;
/*! user data buffers */
vector<RawBufferView> vertexAttribs;
/*! edge crease buffer containing edges (pairs of vertices) that carry edge crease weights */
BufferView<Edge> edge_creases;
/*! edge crease weights for each edge of the edge_creases buffer */
BufferView<float> edge_crease_weights;
/*! vertex crease buffer containing all vertices that carry vertex crease weights */
BufferView<unsigned int> vertex_creases;
/*! vertex crease weights for each vertex of the vertex_creases buffer */
BufferView<float> vertex_crease_weights;
/*! subdivision level for each half edge of the vertexIndices buffer */
BufferView<float> levels;
float tessellationRate; // constant rate that is used when levels is not set
/*! buffer that marks specific faces as holes */
BufferView<unsigned> holes;
/*! all data in this section is generated by initializeHalfEdgeStructures function */
private:
/*! number of half edges used by faces */
size_t numHalfEdges;
/*! fast lookup table to find the first half edge for some face */
mvector<uint32_t> faceStartEdge;
/*! fast lookup table to find the face for some half edge */
mvector<uint32_t> halfEdgeFace;
/*! set with all holes */
std::unique_ptr<HoleSet> holeSet;
/*! fast lookup table to detect invalid faces */
mvector<char> invalid_face;
/*! test if face i is invalid in timestep j */
__forceinline char& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; }
__forceinline const char& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; }
/*! interpolation cache */
public:
static __forceinline size_t numInterpolationSlots4(size_t stride) { return (stride+15)/16; }
static __forceinline size_t numInterpolationSlots8(size_t stride) { return (stride+31)/32; }
static __forceinline size_t interpolationSlot(size_t prim, size_t slot, size_t stride) {
const size_t slots = numInterpolationSlots4(stride);
assert(slot < slots);
return slots*prim+slot;
}
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_buffer_tags;
std::vector<std::vector<SharedLazyTessellationCache::CacheEntry>> vertex_attrib_buffer_tags;
std::vector<Patch3fa::Ref> patch_eval_trees;
/*! the following data is only required during construction of the
* half edge structure and can be cleared for static scenes */
private:
/*! map with all vertex creases */
std::unique_ptr<VertexCreaseMap> vertexCreaseMap;
/*! map with all edge creases */
std::unique_ptr<EdgeCreaseMap> edgeCreaseMap;
protected:
/*! counts number of geometry commits */
size_t commitCounter;
};
namespace isa
{
struct SubdivMeshISA : public SubdivMesh
{
SubdivMeshISA (Device* device)
: SubdivMesh(device) {}
void interpolate(const RTCInterpolateArguments* const args);
void interpolateN(const RTCInterpolateNArguments* const args);
};
}
DECLARE_ISA_FUNCTION(SubdivMesh*, createSubdivMesh, Device*);
};

View file

@ -0,0 +1,194 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "scene_triangle_mesh.h"
#include "scene.h"
namespace embree
{
#if defined(EMBREE_LOWEST_ISA)
TriangleMesh::TriangleMesh (Device* device)
: Geometry(device,GTY_TRIANGLE_MESH,0,1)
{
vertices.resize(numTimeSteps);
}
void TriangleMesh::setMask (unsigned mask)
{
this->mask = mask;
Geometry::update();
}
void TriangleMesh::setNumTimeSteps (unsigned int numTimeSteps)
{
vertices.resize(numTimeSteps);
Geometry::setNumTimeSteps(numTimeSteps);
}
void TriangleMesh::setVertexAttributeCount (unsigned int N)
{
vertexAttribs.resize(N);
Geometry::update();
}
void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num)
{
/* verify that all accesses are 4 bytes aligned */
if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3))
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned");
if (type == RTC_BUFFER_TYPE_VERTEX)
{
if (format != RTC_FORMAT_FLOAT3)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex buffer format");
/* if buffer is larger than 16GB the premultiplied index optimization does not work */
if (stride*num > 16ll*1024ll*1024ll*1024ll)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "vertex buffer can be at most 16GB large");
if (slot >= vertices.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid vertex buffer slot");
vertices[slot].set(buffer, offset, stride, num, format);
vertices[slot].checkPadding16();
vertices0 = vertices[0];
}
else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
if (format < RTC_FORMAT_FLOAT || format > RTC_FORMAT_FLOAT16)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer format");
if (slot >= vertexAttribs.size())
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid vertex attribute buffer slot");
vertexAttribs[slot].set(buffer, offset, stride, num, format);
vertexAttribs[slot].checkPadding16();
}
else if (type == RTC_BUFFER_TYPE_INDEX)
{
if (slot != 0)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
if (format != RTC_FORMAT_UINT3)
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "invalid index buffer format");
triangles.set(buffer, offset, stride, num, format);
setNumPrimitives(num);
}
else
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
}
void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot)
{
if (type == RTC_BUFFER_TYPE_INDEX)
{
if (slot != 0)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return triangles.getPtr();
}
else if (type == RTC_BUFFER_TYPE_VERTEX)
{
if (slot >= vertices.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return vertices[slot].getPtr();
}
else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
if (slot >= vertexAttribs.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return vertexAttribs[slot].getPtr();
}
else
{
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
return nullptr;
}
}
void TriangleMesh::updateBuffer(RTCBufferType type, unsigned int slot)
{
if (type == RTC_BUFFER_TYPE_INDEX)
{
if (slot != 0)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
triangles.setModified();
}
else if (type == RTC_BUFFER_TYPE_VERTEX)
{
if (slot >= vertices.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
vertices[slot].setModified();
}
else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
if (slot >= vertexAttribs.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
vertexAttribs[slot].setModified();
}
else
{
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
}
Geometry::update();
}
void TriangleMesh::commit()
{
/* verify that stride of all time steps are identical */
for (unsigned int t=0; t<numTimeSteps; t++)
if (vertices[t].getStride() != vertices[0].getStride())
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step");
Geometry::commit();
}
void TriangleMesh::addElementsToCount (GeometryCounts & counts) const
{
if (numTimeSteps == 1) counts.numTriangles += numPrimitives;
else counts.numMBTriangles += numPrimitives;
}
bool TriangleMesh::verify()
{
/*! verify size of vertex arrays */
if (vertices.size() == 0) return false;
for (const auto& buffer : vertices)
if (buffer.size() != numVertices())
return false;
/*! verify size of user vertex arrays */
for (const auto& buffer : vertexAttribs)
if (buffer.size() != numVertices())
return false;
/*! verify triangle indices */
for (size_t i=0; i<size(); i++) {
if (triangles[i].v[0] >= numVertices()) return false;
if (triangles[i].v[1] >= numVertices()) return false;
if (triangles[i].v[2] >= numVertices()) return false;
}
/*! verify vertices */
for (const auto& buffer : vertices)
for (size_t i=0; i<buffer.size(); i++)
if (!isvalid(buffer[i]))
return false;
return true;
}
void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
#endif
namespace isa
{
TriangleMesh* createTriangleMesh(Device* device) {
return new TriangleMeshISA(device);
}
}
}

View file

@ -0,0 +1,351 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "geometry.h"
#include "buffer.h"
namespace embree
{
/*! Triangle Mesh */
struct TriangleMesh : public Geometry
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_TRIANGLE_MESH;
/*! triangle indices */
struct Triangle
{
uint32_t v[3];
/*! outputs triangle indices */
__forceinline friend embree_ostream operator<<(embree_ostream cout, const Triangle& t) {
return cout << "Triangle { " << t.v[0] << ", " << t.v[1] << ", " << t.v[2] << " }";
}
};
public:
/*! triangle mesh construction */
TriangleMesh (Device* device);
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
void addElementsToCount (GeometryCounts & counts) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{
unsigned int primID = args->primID;
float u = args->u;
float v = args->v;
RTCBufferType bufferType = args->bufferType;
unsigned int bufferSlot = args->bufferSlot;
float* P = args->P;
float* dPdu = args->dPdu;
float* dPdv = args->dPdv;
float* ddPdudu = args->ddPdudu;
float* ddPdvdv = args->ddPdvdv;
float* ddPdudv = args->ddPdudv;
unsigned int valueCount = args->valueCount;
/* calculate base pointer and stride */
assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
(bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
const char* src = nullptr;
size_t stride = 0;
if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
src = vertexAttribs[bufferSlot].getPtr();
stride = vertexAttribs[bufferSlot].getStride();
} else {
src = vertices[bufferSlot].getPtr();
stride = vertices[bufferSlot].getStride();
}
for (unsigned int i=0; i<valueCount; i+=N)
{
size_t ofs = i*sizeof(float);
const float w = 1.0f-u-v;
const Triangle& tri = triangle(primID);
const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
if (P) {
mem<vfloat<N>>::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2)));
}
if (dPdu) {
assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0);
assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,p2-p0);
}
if (ddPdudu) {
assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
}
}
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th triangle*/
__forceinline const Triangle& triangle(size_t i) const {
return triangles[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const Vec3fa vertex(size_t i) const {
return vertices0[i];
}
/*! returns i'th vertex of the first time step */
__forceinline const char* vertexPtr(size_t i) const {
return vertices0.getPtr(i);
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const Vec3fa vertex(size_t i, size_t itime) const {
return vertices[itime][i];
}
/*! returns i'th vertex of itime'th timestep */
__forceinline const char* vertexPtr(size_t i, size_t itime) const {
return vertices[itime].getPtr(i);
}
/*! returns i'th vertex of for specified time */
__forceinline Vec3fa vertex(size_t i, float time) const
{
float ftime;
const size_t itime = timeSegment(time, ftime);
const float t0 = 1.0f - ftime;
const float t1 = ftime;
Vec3fa v0 = vertex(i, itime+0);
Vec3fa v1 = vertex(i, itime+1);
return madd(Vec3fa(t0),v0,t1*v1);
}
/*! calculates the bounds of the i'th triangle */
__forceinline BBox3fa bounds(size_t i) const
{
const Triangle& tri = triangle(i);
const Vec3fa v0 = vertex(tri.v[0]);
const Vec3fa v1 = vertex(tri.v[1]);
const Vec3fa v2 = vertex(tri.v[2]);
return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
}
/*! calculates the bounds of the i'th triangle at the itime'th timestep */
__forceinline BBox3fa bounds(size_t i, size_t itime) const
{
const Triangle& tri = triangle(i);
const Vec3fa v0 = vertex(tri.v[0],itime);
const Vec3fa v1 = vertex(tri.v[1],itime);
const Vec3fa v2 = vertex(tri.v[2],itime);
return BBox3fa(min(v0,v1,v2),max(v0,v1,v2));
}
/*! check if the i'th primitive is valid at the itime'th timestep */
__forceinline bool valid(size_t i, size_t itime) const {
return valid(i, make_range(itime, itime));
}
/*! check if the i'th primitive is valid between the specified time range */
__forceinline bool valid(size_t i, const range<size_t>& itime_range) const
{
const Triangle& tri = triangle(i);
if (unlikely(tri.v[0] >= numVertices())) return false;
if (unlikely(tri.v[1] >= numVertices())) return false;
if (unlikely(tri.v[2] >= numVertices())) return false;
for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
{
if (!isvalid(vertex(tri.v[0],itime))) return false;
if (!isvalid(vertex(tri.v[1],itime))) return false;
if (!isvalid(vertex(tri.v[2],itime))) return false;
}
return true;
}
/*! calculates the linear bounds of the i'th primitive at the itimeGlobal'th time segment */
__forceinline LBBox3fa linearBounds(size_t i, size_t itime) const {
return LBBox3fa(bounds(i,itime+0),bounds(i,itime+1));
}
/*! calculates the build bounds of the i'th primitive, if it's valid */
__forceinline bool buildBounds(size_t i, BBox3fa* bbox = nullptr) const
{
const Triangle& tri = triangle(i);
if (unlikely(tri.v[0] >= numVertices())) return false;
if (unlikely(tri.v[1] >= numVertices())) return false;
if (unlikely(tri.v[2] >= numVertices())) return false;
for (size_t t=0; t<numTimeSteps; t++)
{
const Vec3fa v0 = vertex(tri.v[0],t);
const Vec3fa v1 = vertex(tri.v[1],t);
const Vec3fa v2 = vertex(tri.v[2],t);
if (unlikely(!isvalid(v0) || !isvalid(v1) || !isvalid(v2)))
return false;
}
if (likely(bbox))
*bbox = bounds(i);
return true;
}
/*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
__forceinline bool buildBounds(size_t i, size_t itime, BBox3fa& bbox) const
{
const Triangle& tri = triangle(i);
if (unlikely(tri.v[0] >= numVertices())) return false;
if (unlikely(tri.v[1] >= numVertices())) return false;
if (unlikely(tri.v[2] >= numVertices())) return false;
assert(itime+1 < numTimeSteps);
const Vec3fa a0 = vertex(tri.v[0],itime+0); if (unlikely(!isvalid(a0))) return false;
const Vec3fa a1 = vertex(tri.v[1],itime+0); if (unlikely(!isvalid(a1))) return false;
const Vec3fa a2 = vertex(tri.v[2],itime+0); if (unlikely(!isvalid(a2))) return false;
const Vec3fa b0 = vertex(tri.v[0],itime+1); if (unlikely(!isvalid(b0))) return false;
const Vec3fa b1 = vertex(tri.v[1],itime+1); if (unlikely(!isvalid(b1))) return false;
const Vec3fa b2 = vertex(tri.v[2],itime+1); if (unlikely(!isvalid(b2))) return false;
/* use bounds of first time step in builder */
bbox = BBox3fa(min(a0,a1,a2),max(a0,a1,a2));
return true;
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline LBBox3fa linearBounds(size_t primID, const BBox1f& dt) const {
return LBBox3fa([&] (size_t itime) { return bounds(primID, itime); }, dt, time_range, fnumTimeSegments);
}
/*! calculates the linear bounds of the i'th primitive for the specified time range */
__forceinline bool linearBounds(size_t i, const BBox1f& dt, LBBox3fa& bbox) const {
if (!valid(i, timeSegmentRange(dt))) return false;
bbox = linearBounds(i, dt);
return true;
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
return (float*) vertices0.getPtr();
}
/* gets version info of topology */
unsigned int getTopologyVersion() const {
return triangles.modCounter;
}
/* returns true if topology changed */
bool topologyChanged(unsigned int otherVersion) const {
return triangles.isModified(otherVersion); // || numPrimitivesChanged;
}
/* returns the projected area */
__forceinline float projectedPrimitiveArea(const size_t i) const {
const Triangle& tri = triangle(i);
const Vec3fa v0 = vertex(tri.v[0]);
const Vec3fa v1 = vertex(tri.v[1]);
const Vec3fa v2 = vertex(tri.v[2]);
return areaProjectedTriangle(v0,v1,v2);
}
public:
BufferView<Triangle> triangles; //!< array of triangles
BufferView<Vec3fa> vertices0; //!< fast access to first vertex buffer
Device::vector<BufferView<Vec3fa>> vertices = device; //!< vertex array for each timestep
Device::vector<RawBufferView> vertexAttribs = device; //!< vertex attributes
};
namespace isa
{
struct TriangleMeshISA : public TriangleMesh
{
TriangleMeshISA (Device* device)
: TriangleMesh(device) {}
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(TriangleMesh*, createTriangleMesh, Device*);
}

View file

@ -0,0 +1,96 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "accelset.h"
namespace embree
{
/*! User geometry with user defined intersection functions */
struct UserGeometry : public AccelSet
{
/*! type of this geometry */
static const Geometry::GTypeMask geom_type = Geometry::MTY_USER_GEOMETRY;
public:
UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1);
virtual void setMask (unsigned mask);
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr);
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect);
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded);
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const;
__forceinline float projectedPrimitiveArea(const size_t i) const { return 0.0f; }
};
namespace isa
{
struct UserGeometryISA : public UserGeometry
{
UserGeometryISA (Device* device)
: UserGeometry(device) {}
PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,&bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(mvector<PrimRef>& prims, size_t itime, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
BBox3fa bounds = empty;
if (!buildBounds(j,itime,bounds)) continue;
const PrimRef prim(bounds,geomID,unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfo createPrimRefArrayMB(PrimRef* prims, const BBox1f& time_range, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfo pinfo(empty);
const BBox1f t0t1 = BBox1f::intersect(getTimeRange(), time_range);
if (t0t1.empty()) return pinfo;
for (size_t j = r.begin(); j < r.end(); j++) {
LBBox3fa lbounds = empty;
if (!linearBounds(j, t0t1, lbounds))
continue;
const PrimRef prim(lbounds.bounds(), geomID, unsigned(j));
pinfo.add_center2(prim);
prims[k++] = prim;
}
return pinfo;
}
PrimInfoMB createPrimRefMBArray(mvector<PrimRefMB>& prims, const BBox1f& t0t1, const range<size_t>& r, size_t k, unsigned int geomID) const
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!valid(j, timeSegmentRange(t0t1))) continue;
const PrimRefMB prim(linearBounds(j,t0t1),this->numTimeSegments(),this->time_range,this->numTimeSegments(),geomID,unsigned(j));
pinfo.add_primref(prim);
prims[k++] = prim;
}
return pinfo;
}
};
}
DECLARE_ISA_FUNCTION(UserGeometry*, createUserGeometry, Device*);
}

View file

@ -0,0 +1,24 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "scene.h"
#include "../../common/algorithms/parallel_any_of.h"
namespace embree
{
void Scene::checkIfModifiedAndSet ()
{
if (isModified ()) return;
auto geometryIsModified = [this](size_t geomID)->bool {
return isGeometryModified(geomID);
};
if (parallel_any_of (size_t(0), geometries.size (), geometryIsModified)) {
setModified ();
}
}
}

View file

@ -0,0 +1,125 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/*! An item on the stack holds the node ID and distance of that node. */
template<typename T>
struct __aligned(16) StackItemT
{
/*! assert that the xchg function works */
static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
__forceinline StackItemT() {}
__forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
/*! use SSE instructions to swap stack items */
__forceinline static void xchg(StackItemT& a, StackItemT& b)
{
const vfloat4 sse_a = vfloat4::load((float*)&a);
const vfloat4 sse_b = vfloat4::load((float*)&b);
vfloat4::store(&a,sse_b);
vfloat4::store(&b,sse_a);
}
/*! Sort 2 stack items. */
__forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
if (s2.dist < s1.dist) xchg(s2,s1);
}
/*! Sort 3 stack items. */
__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
{
if (s2.dist < s1.dist) xchg(s2,s1);
if (s3.dist < s2.dist) xchg(s3,s2);
if (s2.dist < s1.dist) xchg(s2,s1);
}
/*! Sort 4 stack items. */
__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
{
if (s2.dist < s1.dist) xchg(s2,s1);
if (s4.dist < s3.dist) xchg(s4,s3);
if (s3.dist < s1.dist) xchg(s3,s1);
if (s4.dist < s2.dist) xchg(s4,s2);
if (s3.dist < s2.dist) xchg(s3,s2);
}
/*! use SSE instructions to swap stack items */
__forceinline static void cmp_xchg(vint4& a, vint4& b)
{
#if defined(__AVX512VL__)
const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
#else
const vboolf4 mask0(b < a);
const vboolf4 mask(shuffle<2,2,2,2>(mask0));
#endif
const vint4 c = select(mask,b,a);
const vint4 d = select(mask,a,b);
a = c;
b = d;
}
/*! Sort 3 stack items. */
__forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
{
cmp_xchg(s2,s1);
cmp_xchg(s3,s2);
cmp_xchg(s2,s1);
}
/*! Sort 4 stack items. */
__forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
{
cmp_xchg(s2,s1);
cmp_xchg(s4,s3);
cmp_xchg(s3,s1);
cmp_xchg(s4,s2);
cmp_xchg(s3,s2);
}
/*! Sort N stack items. */
__forceinline friend void sort(StackItemT* begin, StackItemT* end)
{
for (StackItemT* i = begin+1; i != end; ++i)
{
const vfloat4 item = vfloat4::load((float*)i);
const unsigned dist = i->dist;
StackItemT* j = i;
while ((j != begin) && ((j-1)->dist < dist))
{
vfloat4::store(j, vfloat4::load((float*)(j-1)));
--j;
}
vfloat4::store(j, item);
}
}
public:
T ptr;
unsigned dist;
};
/*! An item on the stack holds the node ID and active ray mask. */
template<typename T>
struct __aligned(8) StackItemMaskT
{
T ptr;
size_t mask;
};
struct __aligned(8) StackItemMaskCoherent
{
size_t mask;
size_t parent;
size_t child;
};
}

View file

@ -0,0 +1,128 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "stat.h"
namespace embree
{
Stat Stat::instance;
Stat::Stat () {
}
Stat::~Stat ()
{
#ifdef EMBREE_STAT_COUNTERS
Stat::print(std::cout);
#endif
}
void Stat::print(embree_ostream cout)
{
Counters& cntrs = instance.cntrs;
Counters::Data& data = instance.cntrs.code;
//Counters::Data& data = instance.cntrs.active;
/* print absolute numbers */
cout << "--------- ABSOLUTE ---------" << std::endl;
cout << " #normal_travs = " << float(data.normal.travs )*1E-6 << "M" << std::endl;
cout << " #nodes = " << float(data.normal.trav_nodes )*1E-6 << "M" << std::endl;
cout << " #nodes_xfm = " << float(data.normal.trav_xfm_nodes )*1E-6 << "M" << std::endl;
cout << " #leaves = " << float(data.normal.trav_leaves )*1E-6 << "M" << std::endl;
cout << " #prims = " << float(data.normal.trav_prims )*1E-6 << "M" << std::endl;
cout << " #prim_hits = " << float(data.normal.trav_prim_hits )*1E-6 << "M" << std::endl;
cout << " #stack nodes = " << float(data.normal.trav_stack_nodes )*1E-6 << "M" << std::endl;
cout << " #stack pop = " << float(data.normal.trav_stack_pop )*1E-6 << "M" << std::endl;
size_t normal_box_hits = 0;
size_t weighted_box_hits = 0;
for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
normal_box_hits += data.normal.trav_hit_boxes[i];
weighted_box_hits += data.normal.trav_hit_boxes[i]*i;
}
cout << " #hit_boxes = " << normal_box_hits << " (total) distribution: ";
float average = 0.0f;
for (size_t i=0;i<SIZE_HISTOGRAM;i++)
{
float value = 100.0f * data.normal.trav_hit_boxes[i] / normal_box_hits;
cout << "[" << i << "] " << value << " ";
average += (float)i*data.normal.trav_hit_boxes[i] / normal_box_hits;
}
cout << " average = " << average << std::endl;
for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.normal.trav_hit_boxes[i]*i / weighted_box_hits << " ";
cout << std::endl;
if (data.shadow.travs) {
cout << " #shadow_travs = " << float(data.shadow.travs )*1E-6 << "M" << std::endl;
cout << " #nodes = " << float(data.shadow.trav_nodes )*1E-6 << "M" << std::endl;
cout << " #nodes_xfm = " << float(data.shadow.trav_xfm_nodes)*1E-6 << "M" << std::endl;
cout << " #leaves = " << float(data.shadow.trav_leaves )*1E-6 << "M" << std::endl;
cout << " #prims = " << float(data.shadow.trav_prims )*1E-6 << "M" << std::endl;
cout << " #prim_hits = " << float(data.shadow.trav_prim_hits)*1E-6 << "M" << std::endl;
cout << " #stack nodes = " << float(data.shadow.trav_stack_nodes )*1E-6 << "M" << std::endl;
cout << " #stack pop = " << float(data.shadow.trav_stack_pop )*1E-6 << "M" << std::endl;
size_t shadow_box_hits = 0;
size_t weighted_shadow_box_hits = 0;
for (size_t i=0;i<SIZE_HISTOGRAM;i++) {
shadow_box_hits += data.shadow.trav_hit_boxes[i];
weighted_shadow_box_hits += data.shadow.trav_hit_boxes[i]*i;
}
cout << " #hit_boxes = ";
for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i] / shadow_box_hits << " ";
cout << std::endl;
for (size_t i=0;i<SIZE_HISTOGRAM;i++) cout << "[" << i << "] " << 100.0f * data.shadow.trav_hit_boxes[i]*i / weighted_shadow_box_hits << " ";
cout << std::endl;
}
cout << std::endl;
/* print per traversal numbers */
cout << "--------- PER TRAVERSAL ---------" << std::endl;
float active_normal_travs = float(cntrs.active.normal.travs )/float(cntrs.all.normal.travs );
float active_normal_trav_nodes = float(cntrs.active.normal.trav_nodes )/float(cntrs.all.normal.trav_nodes );
float active_normal_trav_xfm_nodes = float(cntrs.active.normal.trav_xfm_nodes )/float(cntrs.all.normal.trav_xfm_nodes );
float active_normal_trav_leaves = float(cntrs.active.normal.trav_leaves)/float(cntrs.all.normal.trav_leaves);
float active_normal_trav_prims = float(cntrs.active.normal.trav_prims )/float(cntrs.all.normal.trav_prims );
float active_normal_trav_prim_hits = float(cntrs.active.normal.trav_prim_hits )/float(cntrs.all.normal.trav_prim_hits );
float active_normal_trav_stack_pop = float(cntrs.active.normal.trav_stack_pop )/float(cntrs.all.normal.trav_stack_pop );
cout << " #normal_travs = " << float(cntrs.code.normal.travs )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_travs << "% active" << std::endl;
cout << " #nodes = " << float(cntrs.code.normal.trav_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_nodes << "% active" << std::endl;
cout << " #node_xfm = " << float(cntrs.code.normal.trav_xfm_nodes )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_xfm_nodes << "% active" << std::endl;
cout << " #leaves = " << float(cntrs.code.normal.trav_leaves)/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_leaves << "% active" << std::endl;
cout << " #prims = " << float(cntrs.code.normal.trav_prims )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prims << "% active" << std::endl;
cout << " #prim_hits = " << float(cntrs.code.normal.trav_prim_hits )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_prim_hits << "% active" << std::endl;
cout << " #stack_pop = " << float(cntrs.code.normal.trav_stack_pop )/float(cntrs.code.normal.travs) << ", " << 100.0f*active_normal_trav_stack_pop << "% active" << std::endl;
if (cntrs.all.shadow.travs) {
float active_shadow_travs = float(cntrs.active.shadow.travs )/float(cntrs.all.shadow.travs );
float active_shadow_trav_nodes = float(cntrs.active.shadow.trav_nodes )/float(cntrs.all.shadow.trav_nodes );
float active_shadow_trav_xfm_nodes = float(cntrs.active.shadow.trav_xfm_nodes )/float(cntrs.all.shadow.trav_xfm_nodes );
float active_shadow_trav_leaves = float(cntrs.active.shadow.trav_leaves)/float(cntrs.all.shadow.trav_leaves);
float active_shadow_trav_prims = float(cntrs.active.shadow.trav_prims )/float(cntrs.all.shadow.trav_prims );
float active_shadow_trav_prim_hits = float(cntrs.active.shadow.trav_prim_hits )/float(cntrs.all.shadow.trav_prim_hits );
cout << " #shadow_travs = " << float(cntrs.code.shadow.travs )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_travs << "% active" << std::endl;
cout << " #nodes = " << float(cntrs.code.shadow.trav_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_nodes << "% active" << std::endl;
cout << " #nodes_xfm = " << float(cntrs.code.shadow.trav_xfm_nodes )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_xfm_nodes << "% active" << std::endl;
cout << " #leaves = " << float(cntrs.code.shadow.trav_leaves)/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_leaves << "% active" << std::endl;
cout << " #prims = " << float(cntrs.code.shadow.trav_prims )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prims << "% active" << std::endl;
cout << " #prim_hits = " << float(cntrs.code.shadow.trav_prim_hits )/float(cntrs.code.shadow.travs) << ", " << 100.0f*active_shadow_trav_prim_hits << "% active" << std::endl;
}
cout << std::endl;
/* print user counters for performance tuning */
cout << "--------- USER ---------" << std::endl;
for (size_t i=0; i<10; i++)
cout << "#user" << i << " = " << float(cntrs.user[i])/float(cntrs.all.normal.travs+cntrs.all.shadow.travs) << " per traversal" << std::endl;
cout << "#user5/user3 " << 100.0f*float(cntrs.user[5])/float(cntrs.user[3]) << "%" << std::endl;
cout << "#user6/user3 " << 100.0f*float(cntrs.user[6])/float(cntrs.user[3]) << "%" << std::endl;
cout << "#user7/user3 " << 100.0f*float(cntrs.user[7])/float(cntrs.user[3]) << "%" << std::endl;
cout << std::endl;
}
}

View file

@ -0,0 +1,116 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
/* Macros to gather statistics */
#ifdef EMBREE_STAT_COUNTERS
# define STAT(x) x
# define STAT3(s,x,y,z) \
STAT(Stat::get().code .s+=x); \
STAT(Stat::get().active.s+=y); \
STAT(Stat::get().all .s+=z);
# define STAT_USER(i,x) Stat::get().user[i]+=x;
#else
# define STAT(x)
# define STAT3(s,x,y,z)
# define STAT_USER(i,x)
#endif
namespace embree
{
/*! Gathers ray tracing statistics. We count 1) how often a code
* location is reached, 2) how many SIMD lanes are active, 3) how
* many SIMD lanes reach the code location */
class Stat
{
public:
static const size_t SIZE_HISTOGRAM = 64+1;
/*! constructs stat counter class */
Stat ();
/*! destructs stat counter class */
~Stat ();
class Counters
{
public:
Counters () {
clear();
}
void clear()
{
all.clear();
active.clear();
code.clear();
for (auto& u : user) u.store(0);
}
public:
/* per packet and per ray stastics */
struct Data
{
void clear () {
normal.clear();
shadow.clear();
point_query.clear();
}
/* normal and shadow ray statistics */
struct
{
void clear()
{
travs.store(0);
trav_nodes.store(0);
trav_leaves.store(0);
trav_prims.store(0);
trav_prim_hits.store(0);
for (auto& v : trav_hit_boxes) v.store(0);
trav_stack_pop.store(0);
trav_stack_nodes.store(0);
trav_xfm_nodes.store(0);
}
public:
std::atomic<size_t> travs;
std::atomic<size_t> trav_nodes;
std::atomic<size_t> trav_leaves;
std::atomic<size_t> trav_prims;
std::atomic<size_t> trav_prim_hits;
std::atomic<size_t> trav_hit_boxes[SIZE_HISTOGRAM+1];
std::atomic<size_t> trav_stack_pop;
std::atomic<size_t> trav_stack_nodes;
std::atomic<size_t> trav_xfm_nodes;
} normal, shadow, point_query;
} all, active, code;
std::atomic<size_t> user[10];
};
public:
static __forceinline Counters& get() {
return instance.cntrs;
}
static void clear() {
instance.cntrs.clear();
}
static void print(embree_ostream cout);
private:
Counters cntrs;
private:
static Stat instance;
};
}

View file

@ -0,0 +1,541 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "state.h"
#include "../../common/lexers/streamfilters.h"
namespace embree
{
MutexSys g_printMutex;
State::ErrorHandler State::g_errorHandler;
State::ErrorHandler::ErrorHandler()
: thread_error(createTls()) {}
State::ErrorHandler::~ErrorHandler()
{
Lock<MutexSys> lock(errors_mutex);
for (size_t i=0; i<thread_errors.size(); i++)
delete thread_errors[i];
destroyTls(thread_error);
thread_errors.clear();
}
RTCError* State::ErrorHandler::error()
{
RTCError* stored_error = (RTCError*) getTls(thread_error);
if (stored_error) return stored_error;
Lock<MutexSys> lock(errors_mutex);
stored_error = new RTCError(RTC_ERROR_NONE);
thread_errors.push_back(stored_error);
setTls(thread_error,stored_error);
return stored_error;
}
State::State ()
: enabled_cpu_features(getCPUFeatures()),
enabled_builder_cpu_features(enabled_cpu_features),
frequency_level(FREQUENCY_SIMD256)
{
tri_accel = "default";
tri_builder = "default";
tri_traverser = "default";
tri_accel_mb = "default";
tri_builder_mb = "default";
tri_traverser_mb = "default";
quad_accel = "default";
quad_builder = "default";
quad_traverser = "default";
quad_accel_mb = "default";
quad_builder_mb = "default";
quad_traverser_mb = "default";
line_accel = "default";
line_builder = "default";
line_traverser = "default";
line_accel_mb = "default";
line_builder_mb = "default";
line_traverser_mb = "default";
hair_accel = "default";
hair_builder = "default";
hair_traverser = "default";
hair_accel_mb = "default";
hair_builder_mb = "default";
hair_traverser_mb = "default";
object_accel = "default";
object_builder = "default";
object_accel_min_leaf_size = 1;
object_accel_max_leaf_size = 1;
object_accel_mb = "default";
object_builder_mb = "default";
object_accel_mb_min_leaf_size = 1;
object_accel_mb_max_leaf_size = 1;
max_spatial_split_replications = 1.2f;
useSpatialPreSplits = false;
tessellation_cache_size = 128*1024*1024;
subdiv_accel = "default";
subdiv_accel_mb = "default";
grid_accel = "default";
grid_builder = "default";
grid_accel_mb = "default";
grid_builder_mb = "default";
instancing_open_min = 0;
instancing_block_size = 0;
instancing_open_factor = 8.0f;
instancing_open_max_depth = 32;
instancing_open_max = 50000000;
float_exceptions = false;
quality_flags = -1;
scene_flags = -1;
verbose = 0;
benchmark = 0;
numThreads = 0;
numUserThreads = 0;
#if TASKING_INTERNAL
set_affinity = true;
#else
set_affinity = false;
#endif
start_threads = false;
enable_selockmemoryprivilege = false;
#if defined(__LINUX__)
hugepages = true;
#else
hugepages = false;
#endif
hugepages_success = true;
alloc_main_block_size = 0;
alloc_num_main_slots = 0;
alloc_thread_block_size = 0;
alloc_single_thread_alloc = -1;
error_function = nullptr;
error_function_userptr = nullptr;
memory_monitor_function = nullptr;
memory_monitor_userptr = nullptr;
}
State::~State() {
}
bool State::hasISA(const int isa) {
return (enabled_cpu_features & isa) == isa;
}
bool State::checkISASupport() {
#if defined(__ARM_NEON)
/*
* NEON CPU type is a mixture of NEON and SSE2
*/
bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2;
/* this will be true when explicitly initialize Device with `isa=neon` config */
bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON;
return hasSSE2 || hasNEON;
#else
return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features;
#endif
}
void State::verify()
{
/* verify that calculations stay in range */
assert(rcp(min_rcp_input)*FLT_LARGE+FLT_LARGE < 0.01f*FLT_MAX);
/* here we verify that CPP files compiled for a specific ISA only
* call that same or lower ISA version of non-inlined class member
* functions */
#if defined(DEBUG)
#if defined(EMBREE_TARGET_SSE2)
#if !defined(__ARM_NEON)
assert(sse2::getISA() <= SSE2);
#endif
#endif
#if defined(EMBREE_TARGET_SSE42)
assert(sse42::getISA() <= SSE42);
#endif
#if defined(EMBREE_TARGET_AVX)
assert(avx::getISA() <= AVX);
#endif
#if defined(EMBREE_TARGET_AVX2)
assert(avx2::getISA() <= AVX2);
#endif
#if defined (EMBREE_TARGET_AVX512)
assert(avx512::getISA() <= AVX512);
#endif
#endif
}
const char* symbols[3] = { "=", ",", "|" };
bool State::parseFile(const FileName& fileName)
{
Ref<Stream<int> > file;
// -- GODOT start --
// try {
file = new FileStream(fileName);
// }
// catch (std::runtime_error& e) {
// (void) e;
// return false;
// }
// -- GODOT end --
std::vector<std::string> syms;
for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
syms.push_back(symbols[i]);
Ref<TokenStream> cin = new TokenStream(new LineCommentFilter(file,"#"),
TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
TokenStream::separators,syms);
parse(cin);
return true;
}
void State::parseString(const char* cfg)
{
if (cfg == nullptr) return;
std::vector<std::string> syms;
for (size_t i=0; i<sizeof(symbols)/sizeof(void*); i++)
syms.push_back(symbols[i]);
Ref<TokenStream> cin = new TokenStream(new StrStream(cfg),
TokenStream::alpha+TokenStream::ALPHA+TokenStream::numbers+"_.",
TokenStream::separators,syms);
parse(cin);
}
int string_to_cpufeatures(const std::string& isa)
{
if (isa == "sse" ) return SSE;
else if (isa == "sse2") return SSE2;
else if (isa == "sse3") return SSE3;
else if (isa == "ssse3") return SSSE3;
else if (isa == "sse41") return SSE41;
else if (isa == "sse4.1") return SSE41;
else if (isa == "sse42") return SSE42;
else if (isa == "sse4.2") return SSE42;
else if (isa == "avx") return AVX;
else if (isa == "avxi") return AVXI;
else if (isa == "avx2") return AVX2;
else if (isa == "avx512") return AVX512;
else return SSE2;
}
void State::parse(Ref<TokenStream> cin)
{
/* parse until end of stream */
while (cin->peek() != Token::Eof())
{
const Token tok = cin->get();
if (tok == Token::Id("threads") && cin->trySymbol("="))
numThreads = cin->get().Int();
else if (tok == Token::Id("user_threads")&& cin->trySymbol("="))
numUserThreads = cin->get().Int();
else if (tok == Token::Id("set_affinity")&& cin->trySymbol("="))
set_affinity = cin->get().Int();
else if (tok == Token::Id("affinity")&& cin->trySymbol("="))
set_affinity = cin->get().Int();
else if (tok == Token::Id("start_threads")&& cin->trySymbol("="))
start_threads = cin->get().Int();
else if (tok == Token::Id("isa") && cin->trySymbol("=")) {
std::string isa_str = toLowerCase(cin->get().Identifier());
enabled_cpu_features = string_to_cpufeatures(isa_str);
enabled_builder_cpu_features = enabled_cpu_features;
}
else if (tok == Token::Id("max_isa") && cin->trySymbol("=")) {
std::string isa_str = toLowerCase(cin->get().Identifier());
enabled_cpu_features &= string_to_cpufeatures(isa_str);
enabled_builder_cpu_features &= enabled_cpu_features;
}
else if (tok == Token::Id("max_builder_isa") && cin->trySymbol("=")) {
std::string isa_str = toLowerCase(cin->get().Identifier());
enabled_builder_cpu_features &= string_to_cpufeatures(isa_str);
}
else if (tok == Token::Id("frequency_level") && cin->trySymbol("=")) {
std::string freq = cin->get().Identifier();
if (freq == "simd128") frequency_level = FREQUENCY_SIMD128;
else if (freq == "simd256") frequency_level = FREQUENCY_SIMD256;
else if (freq == "simd512") frequency_level = FREQUENCY_SIMD512;
}
else if (tok == Token::Id("enable_selockmemoryprivilege") && cin->trySymbol("=")) {
enable_selockmemoryprivilege = cin->get().Int();
}
else if (tok == Token::Id("hugepages") && cin->trySymbol("=")) {
hugepages = cin->get().Int();
}
else if (tok == Token::Id("float_exceptions") && cin->trySymbol("="))
float_exceptions = cin->get().Int();
else if ((tok == Token::Id("tri_accel") || tok == Token::Id("accel")) && cin->trySymbol("="))
tri_accel = cin->get().Identifier();
else if ((tok == Token::Id("tri_builder") || tok == Token::Id("builder")) && cin->trySymbol("="))
tri_builder = cin->get().Identifier();
else if ((tok == Token::Id("tri_traverser") || tok == Token::Id("traverser")) && cin->trySymbol("="))
tri_traverser = cin->get().Identifier();
else if ((tok == Token::Id("tri_accel_mb") || tok == Token::Id("accel_mb")) && cin->trySymbol("="))
tri_accel_mb = cin->get().Identifier();
else if ((tok == Token::Id("tri_builder_mb") || tok == Token::Id("builder_mb")) && cin->trySymbol("="))
tri_builder_mb = cin->get().Identifier();
else if ((tok == Token::Id("tri_traverser_mb") || tok == Token::Id("traverser_mb")) && cin->trySymbol("="))
tri_traverser_mb = cin->get().Identifier();
else if ((tok == Token::Id("quad_accel")) && cin->trySymbol("="))
quad_accel = cin->get().Identifier();
else if ((tok == Token::Id("quad_builder")) && cin->trySymbol("="))
quad_builder = cin->get().Identifier();
else if ((tok == Token::Id("quad_traverser")) && cin->trySymbol("="))
quad_traverser = cin->get().Identifier();
else if ((tok == Token::Id("quad_accel_mb")) && cin->trySymbol("="))
quad_accel_mb = cin->get().Identifier();
else if ((tok == Token::Id("quad_builder_mb")) && cin->trySymbol("="))
quad_builder_mb = cin->get().Identifier();
else if ((tok == Token::Id("quad_traverser_mb")) && cin->trySymbol("="))
quad_traverser_mb = cin->get().Identifier();
else if ((tok == Token::Id("line_accel")) && cin->trySymbol("="))
line_accel = cin->get().Identifier();
else if ((tok == Token::Id("line_builder")) && cin->trySymbol("="))
line_builder = cin->get().Identifier();
else if ((tok == Token::Id("line_traverser")) && cin->trySymbol("="))
line_traverser = cin->get().Identifier();
else if ((tok == Token::Id("line_accel_mb")) && cin->trySymbol("="))
line_accel_mb = cin->get().Identifier();
else if ((tok == Token::Id("line_builder_mb")) && cin->trySymbol("="))
line_builder_mb = cin->get().Identifier();
else if ((tok == Token::Id("line_traverser_mb")) && cin->trySymbol("="))
line_traverser_mb = cin->get().Identifier();
else if (tok == Token::Id("hair_accel") && cin->trySymbol("="))
hair_accel = cin->get().Identifier();
else if (tok == Token::Id("hair_builder") && cin->trySymbol("="))
hair_builder = cin->get().Identifier();
else if (tok == Token::Id("hair_traverser") && cin->trySymbol("="))
hair_traverser = cin->get().Identifier();
else if (tok == Token::Id("hair_accel_mb") && cin->trySymbol("="))
hair_accel_mb = cin->get().Identifier();
else if (tok == Token::Id("hair_builder_mb") && cin->trySymbol("="))
hair_builder_mb = cin->get().Identifier();
else if (tok == Token::Id("hair_traverser_mb") && cin->trySymbol("="))
hair_traverser_mb = cin->get().Identifier();
else if (tok == Token::Id("object_accel") && cin->trySymbol("="))
object_accel = cin->get().Identifier();
else if (tok == Token::Id("object_builder") && cin->trySymbol("="))
object_builder = cin->get().Identifier();
else if (tok == Token::Id("object_accel_min_leaf_size") && cin->trySymbol("="))
object_accel_min_leaf_size = cin->get().Int();
else if (tok == Token::Id("object_accel_max_leaf_size") && cin->trySymbol("="))
object_accel_max_leaf_size = cin->get().Int();
else if (tok == Token::Id("object_accel_mb") && cin->trySymbol("="))
object_accel_mb = cin->get().Identifier();
else if (tok == Token::Id("object_builder_mb") && cin->trySymbol("="))
object_builder_mb = cin->get().Identifier();
else if (tok == Token::Id("object_accel_mb_min_leaf_size") && cin->trySymbol("="))
object_accel_mb_min_leaf_size = cin->get().Int();
else if (tok == Token::Id("object_accel_mb_max_leaf_size") && cin->trySymbol("="))
object_accel_mb_max_leaf_size = cin->get().Int();
else if (tok == Token::Id("instancing_open_min") && cin->trySymbol("="))
instancing_open_min = cin->get().Int();
else if (tok == Token::Id("instancing_block_size") && cin->trySymbol("=")) {
instancing_block_size = cin->get().Int();
instancing_open_factor = 0.0f;
}
else if (tok == Token::Id("instancing_open_max_depth") && cin->trySymbol("="))
instancing_open_max_depth = cin->get().Int();
else if (tok == Token::Id("instancing_open_factor") && cin->trySymbol("=")) {
instancing_block_size = 0;
instancing_open_factor = cin->get().Float();
}
else if (tok == Token::Id("instancing_open_max") && cin->trySymbol("="))
instancing_open_max = cin->get().Int();
else if (tok == Token::Id("subdiv_accel") && cin->trySymbol("="))
subdiv_accel = cin->get().Identifier();
else if (tok == Token::Id("subdiv_accel_mb") && cin->trySymbol("="))
subdiv_accel_mb = cin->get().Identifier();
else if (tok == Token::Id("grid_accel") && cin->trySymbol("="))
grid_accel = cin->get().Identifier();
else if (tok == Token::Id("grid_accel_mb") && cin->trySymbol("="))
grid_accel_mb = cin->get().Identifier();
else if (tok == Token::Id("verbose") && cin->trySymbol("="))
verbose = cin->get().Int();
else if (tok == Token::Id("benchmark") && cin->trySymbol("="))
benchmark = cin->get().Int();
else if (tok == Token::Id("quality")) {
if (cin->trySymbol("=")) {
Token flag = cin->get();
if (flag == Token::Id("low")) quality_flags = RTC_BUILD_QUALITY_LOW;
else if (flag == Token::Id("medium")) quality_flags = RTC_BUILD_QUALITY_MEDIUM;
else if (flag == Token::Id("high")) quality_flags = RTC_BUILD_QUALITY_HIGH;
}
}
else if (tok == Token::Id("scene_flags")) {
scene_flags = 0;
if (cin->trySymbol("=")) {
do {
Token flag = cin->get();
if (flag == Token::Id("dynamic") ) scene_flags |= RTC_SCENE_FLAG_DYNAMIC;
else if (flag == Token::Id("compact")) scene_flags |= RTC_SCENE_FLAG_COMPACT;
else if (flag == Token::Id("robust")) scene_flags |= RTC_SCENE_FLAG_ROBUST;
} while (cin->trySymbol("|"));
}
}
else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("="))
max_spatial_split_replications = cin->get().Float();
else if (tok == Token::Id("presplits") && cin->trySymbol("="))
useSpatialPreSplits = cin->get().Int() != 0 ? true : false;
else if (tok == Token::Id("tessellation_cache_size") && cin->trySymbol("="))
tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
else if (tok == Token::Id("cache_size") && cin->trySymbol("="))
tessellation_cache_size = size_t(cin->get().Float()*1024.0f*1024.0f);
else if (tok == Token::Id("alloc_main_block_size") && cin->trySymbol("="))
alloc_main_block_size = cin->get().Int();
else if (tok == Token::Id("alloc_num_main_slots") && cin->trySymbol("="))
alloc_num_main_slots = cin->get().Int();
else if (tok == Token::Id("alloc_thread_block_size") && cin->trySymbol("="))
alloc_thread_block_size = cin->get().Int();
else if (tok == Token::Id("alloc_single_thread_alloc") && cin->trySymbol("="))
alloc_single_thread_alloc = cin->get().Int();
cin->trySymbol(","); // optional , separator
}
}
bool State::verbosity(size_t N) {
return N <= verbose;
}
void State::print()
{
std::cout << "general:" << std::endl;
std::cout << " build threads = " << numThreads << std::endl;
std::cout << " build user threads = " << numUserThreads << std::endl;
std::cout << " start_threads = " << start_threads << std::endl;
std::cout << " affinity = " << set_affinity << std::endl;
std::cout << " frequency_level = ";
switch (frequency_level) {
case FREQUENCY_SIMD128: std::cout << "simd128" << std::endl; break;
case FREQUENCY_SIMD256: std::cout << "simd256" << std::endl; break;
case FREQUENCY_SIMD512: std::cout << "simd512" << std::endl; break;
default: std::cout << "error" << std::endl; break;
}
std::cout << " hugepages = ";
if (!hugepages) std::cout << "disabled" << std::endl;
else if (hugepages_success) std::cout << "enabled" << std::endl;
else std::cout << "failed" << std::endl;
std::cout << " verbosity = " << verbose << std::endl;
std::cout << " cache_size = " << float(tessellation_cache_size)*1E-6 << " MB" << std::endl;
std::cout << " max_spatial_split_replications = " << max_spatial_split_replications << std::endl;
std::cout << "triangles:" << std::endl;
std::cout << " accel = " << tri_accel << std::endl;
std::cout << " builder = " << tri_builder << std::endl;
std::cout << " traverser = " << tri_traverser << std::endl;
std::cout << "motion blur triangles:" << std::endl;
std::cout << " accel = " << tri_accel_mb << std::endl;
std::cout << " builder = " << tri_builder_mb << std::endl;
std::cout << " traverser = " << tri_traverser_mb << std::endl;
std::cout << "quads:" << std::endl;
std::cout << " accel = " << quad_accel << std::endl;
std::cout << " builder = " << quad_builder << std::endl;
std::cout << " traverser = " << quad_traverser << std::endl;
std::cout << "motion blur quads:" << std::endl;
std::cout << " accel = " << quad_accel_mb << std::endl;
std::cout << " builder = " << quad_builder_mb << std::endl;
std::cout << " traverser = " << quad_traverser_mb << std::endl;
std::cout << "line segments:" << std::endl;
std::cout << " accel = " << line_accel << std::endl;
std::cout << " builder = " << line_builder << std::endl;
std::cout << " traverser = " << line_traverser << std::endl;
std::cout << "motion blur line segments:" << std::endl;
std::cout << " accel = " << line_accel_mb << std::endl;
std::cout << " builder = " << line_builder_mb << std::endl;
std::cout << " traverser = " << line_traverser_mb << std::endl;
std::cout << "hair:" << std::endl;
std::cout << " accel = " << hair_accel << std::endl;
std::cout << " builder = " << hair_builder << std::endl;
std::cout << " traverser = " << hair_traverser << std::endl;
std::cout << "motion blur hair:" << std::endl;
std::cout << " accel = " << hair_accel_mb << std::endl;
std::cout << " builder = " << hair_builder_mb << std::endl;
std::cout << " traverser = " << hair_traverser_mb << std::endl;
std::cout << "subdivision surfaces:" << std::endl;
std::cout << " accel = " << subdiv_accel << std::endl;
std::cout << "grids:" << std::endl;
std::cout << " accel = " << grid_accel << std::endl;
std::cout << " builder = " << grid_builder << std::endl;
std::cout << "motion blur grids:" << std::endl;
std::cout << " accel = " << grid_accel_mb << std::endl;
std::cout << " builder = " << grid_builder_mb << std::endl;
std::cout << "object_accel:" << std::endl;
std::cout << " min_leaf_size = " << object_accel_min_leaf_size << std::endl;
std::cout << " max_leaf_size = " << object_accel_max_leaf_size << std::endl;
std::cout << "object_accel_mb:" << std::endl;
std::cout << " min_leaf_size = " << object_accel_mb_min_leaf_size << std::endl;
std::cout << " max_leaf_size = " << object_accel_mb_max_leaf_size << std::endl;
}
}

View file

@ -0,0 +1,196 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "default.h"
namespace embree
{
/* mutex to make printing to cout thread safe */
extern MutexSys g_printMutex;
struct State : public RefCount
{
public:
/*! state construction */
State ();
/*! state destruction */
~State();
/*! verifies that state is correct */
void verify();
/*! parses state from a configuration file */
bool parseFile(const FileName& fileName);
/*! parses the state from a string */
void parseString(const char* cfg);
/*! parses the state from a stream */
void parse(Ref<TokenStream> cin);
/*! prints the state */
void print();
/*! checks if verbosity level is at least N */
bool verbosity(size_t N);
/*! checks if some particular ISA is enabled */
bool hasISA(const int isa);
/*! check whether selected ISA is supported by the HW */
bool checkISASupport();
public:
std::string tri_accel; //!< acceleration structure to use for triangles
std::string tri_builder; //!< builder to use for triangles
std::string tri_traverser; //!< traverser to use for triangles
public:
std::string tri_accel_mb; //!< acceleration structure to use for motion blur triangles
std::string tri_builder_mb; //!< builder to use for motion blur triangles
std::string tri_traverser_mb; //!< traverser to use for triangles
public:
std::string quad_accel; //!< acceleration structure to use for quads
std::string quad_builder; //!< builder to use for quads
std::string quad_traverser; //!< traverser to use for quads
public:
std::string quad_accel_mb; //!< acceleration structure to use for motion blur quads
std::string quad_builder_mb; //!< builder to use for motion blur quads
std::string quad_traverser_mb; //!< traverser to use for motion blur quads
public:
std::string line_accel; //!< acceleration structure to use for line segments
std::string line_builder; //!< builder to use for line segments
std::string line_traverser; //!< traverser to use for line segments
public:
std::string line_accel_mb; //!< acceleration structure to use for motion blur line segments
std::string line_builder_mb; //!< builder to use for motion blur line segments
std::string line_traverser_mb; //!< traverser to use for motion blur line segments
public:
std::string hair_accel; //!< hair acceleration structure to use
std::string hair_builder; //!< builder to use for hair
std::string hair_traverser; //!< traverser to use for hair
public:
std::string hair_accel_mb; //!< acceleration structure to use for motion blur hair
std::string hair_builder_mb; //!< builder to use for motion blur hair
std::string hair_traverser_mb; //!< traverser to use for motion blur hair
public:
std::string object_accel; //!< acceleration structure for user geometries
std::string object_builder; //!< builder for user geometries
int object_accel_min_leaf_size; //!< minimum leaf size for object acceleration structure
int object_accel_max_leaf_size; //!< maximum leaf size for object acceleration structure
public:
std::string object_accel_mb; //!< acceleration structure for user geometries
std::string object_builder_mb; //!< builder for user geometries
int object_accel_mb_min_leaf_size; //!< minimum leaf size for mblur object acceleration structure
int object_accel_mb_max_leaf_size; //!< maximum leaf size for mblur object acceleration structure
public:
std::string subdiv_accel; //!< acceleration structure to use for subdivision surfaces
std::string subdiv_accel_mb; //!< acceleration structure to use for subdivision surfaces
public:
std::string grid_accel; //!< acceleration structure to use for grids
std::string grid_builder; //!< builder for grids
std::string grid_accel_mb; //!< acceleration structure to use for motion blur grids
std::string grid_builder_mb; //!< builder for motion blur grids
public:
float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits
bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder
size_t tessellation_cache_size; //!< size of the shared tessellation cache
public:
size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees
size_t instancing_block_size; //!< instancing opens tree up to average block size of primitives
float instancing_open_factor; //!< instancing opens tree up to x times the number of instances
size_t instancing_open_max_depth; //!< maximum open depth for geometries
size_t instancing_open_max; //!< instancing opens tree to maximally that number of subtrees
public:
bool float_exceptions; //!< enable floating point exceptions
int quality_flags;
int scene_flags;
size_t verbose; //!< verbosity of output
size_t benchmark; //!< true
public:
size_t numThreads; //!< number of threads to use in builders
size_t numUserThreads; //!< number of user provided threads to use in builders
bool set_affinity; //!< sets affinity for worker threads
bool start_threads; //!< true when threads should be started at device creation time
int enabled_cpu_features; //!< CPU ISA features to use
int enabled_builder_cpu_features; //!< CPU ISA features to use for builders only
enum FREQUENCY_LEVEL {
FREQUENCY_SIMD128,
FREQUENCY_SIMD256,
FREQUENCY_SIMD512
} frequency_level; //!< frequency level the app wants to run on (default is SIMD256)
bool enable_selockmemoryprivilege; //!< configures the SeLockMemoryPrivilege under Windows to enable huge pages
bool hugepages; //!< true if huge pages should get used
bool hugepages_success; //!< status for enabling huge pages
public:
size_t alloc_main_block_size; //!< main allocation block size (shared between threads)
int alloc_num_main_slots; //!< number of such shared blocks to be used to allocate
size_t alloc_thread_block_size; //!< size of thread local allocator block size
int alloc_single_thread_alloc; //!< in single mode nodes and leaves use same thread local allocator
public:
/*! checks if we can use AVX */
bool canUseAVX() {
return hasISA(AVX) && frequency_level != FREQUENCY_SIMD128;
}
/*! checks if we can use AVX2 */
bool canUseAVX2() {
return hasISA(AVX2) && frequency_level != FREQUENCY_SIMD128;
}
struct ErrorHandler
{
public:
ErrorHandler();
~ErrorHandler();
RTCError* error();
public:
tls_t thread_error;
std::vector<RTCError*> thread_errors;
MutexSys errors_mutex;
};
ErrorHandler errorHandler;
static ErrorHandler g_errorHandler;
public:
void setErrorFunction(RTCErrorFunction fptr, void* uptr)
{
error_function = fptr;
error_function_userptr = uptr;
}
RTCErrorFunction error_function;
void* error_function_userptr;
public:
void setMemoryMonitorFunction(RTCMemoryMonitorFunction fptr, void* uptr)
{
memory_monitor_function = fptr;
memory_monitor_userptr = uptr;
}
RTCMemoryMonitorFunction memory_monitor_function;
void* memory_monitor_userptr;
};
}

View file

@ -0,0 +1,76 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "default.h"
namespace embree
{
/*! invokes the memory monitor callback */
struct MemoryMonitorInterface {
virtual void memoryMonitor(ssize_t bytes, bool post) = 0;
};
/*! allocator that performs aligned monitored allocations */
template<typename T, size_t alignment = 64>
struct aligned_monitored_allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
__forceinline aligned_monitored_allocator(MemoryMonitorInterface* device)
: device(device), hugepages(false) {}
__forceinline pointer allocate( size_type n )
{
if (n) {
assert(device);
device->memoryMonitor(n*sizeof(T),false);
}
if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
{
pointer p = (pointer) os_malloc(n*sizeof(value_type),hugepages);
assert(p);
return p;
}
return (pointer) alignedMalloc(n*sizeof(value_type),alignment);
}
__forceinline void deallocate( pointer p, size_type n )
{
if (p)
{
if (n*sizeof(value_type) >= 14 * PAGE_SIZE_2M)
os_free(p,n*sizeof(value_type),hugepages);
else
alignedFree(p);
}
else assert(n == 0);
if (n) {
assert(device);
device->memoryMonitor(-ssize_t(n)*sizeof(T),true);
}
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
private:
MemoryMonitorInterface* device;
bool hugepages;
};
/*! monitored vector */
template<typename T>
using mvector = vector_t<T,aligned_monitored_allocator<T,std::alignment_of<T>::value> >;
}