Skip to content

Add rs_allocate_closure free function. #1944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
7 changes: 7 additions & 0 deletions src/include/OSL/rs_free_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,13 @@ OSL_RSOP OSL_HOSTDEVICE bool
rs_trace_get(OSL::OpaqueExecContextPtr oec, OSL::ustringhash name,
OSL::TypeDesc type, void* val, bool derivatives);

/// Allocates memory for a closure color. May return null if no memory could
/// be allocated. It is the renderers responsibility to clean up these
/// allocations after a shader is run and the closures have been processed.
OSL_RSOP OSL_HOSTDEVICE void*
rs_allocate_closure(OSL::OpaqueExecContextPtr oec, size_t size,
size_t alignment);

/// Report errors, warnings, printf, and fprintf.
/// Fmtlib style format specifier is used (vs. printf style)
/// Arguments are represented as EncodedTypes (encodedtypes.h) and
Expand Down
3 changes: 2 additions & 1 deletion src/liboslexec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ if (USE_LLVM_BITCODE)
EMBED_LLVM_BITCODE_IN_CPP ( "${llvm_ops_srcs}" "_host" "osl_llvm_compiled_ops" lib_src "" "${include_dirs}")

set (rs_dependent_ops_srcs
opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp
opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp opclosure.cpp
)
# Achieve the effect of absorbing osl_llvm_compiled_ops by adding its
# sources to rs_dependent_ops_srcs which avoids having to do it at runtime.
Expand All @@ -217,6 +217,7 @@ if (USE_LLVM_BITCODE)
${CMAKE_SOURCE_DIR}/src/liboslexec/opmatrix.cpp
${CMAKE_SOURCE_DIR}/src/liboslexec/optexture.cpp
${CMAKE_SOURCE_DIR}/src/liboslexec/pointcloud.cpp
${CMAKE_SOURCE_DIR}/src/liboslexec/opclosure.cpp
${CMAKE_SOURCE_DIR}/src/liboslnoise/gabornoise.cpp
${CMAKE_SOURCE_DIR}/src/liboslnoise/simplexnoise.cpp
)
Expand Down
19 changes: 2 additions & 17 deletions src/liboslexec/builtindecl.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,29 +109,14 @@
DECL(osl_##name##_dvdvv, "xXXX")



#ifndef __CUDA_ARCH__
DECL(osl_add_closure_closure, "CXCC")
DECL(osl_mul_closure_float, "CXCf")
DECL(osl_mul_closure_color, "CXCc")
DECL(osl_allocate_closure_component, "CXii")
DECL(osl_allocate_weighted_closure_component, "CXiiX")
DECL(osl_closure_to_string, "sXC")
DECL(osl_closure_to_ustringhash, "hXC")
#else
// TODO: Figure out why trying to match the signatures between host and device
// definitions fails with 'LLVM had to make a cast' assertion failure.
//
// In the meantime, use a signature that matches the definitions in rend_lib.cu,
// where void* is used instead of ClosureColor* and ShaderGlobals*.
DECL(osl_add_closure_closure, "XXXX")
DECL(osl_mul_closure_float, "XXXf")
DECL(osl_mul_closure_color, "XXXc")
DECL(osl_mul_closure_color, "XXXX")
DECL(osl_allocate_closure_component, "XXii")
DECL(osl_allocate_weighted_closure_component, "XXiiX")
DECL(osl_closure_to_string, "sXX")
DECL(osl_closure_to_ustringhash, "hXX")
#endif

DECL(osl_format, "hh*")
DECL(osl_gen_ustringhash_pod, "hs")
DECL(osl_gen_ustring, "sh")
Expand Down
22 changes: 9 additions & 13 deletions src/liboslexec/llvm_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3850,19 +3850,15 @@ LLVMGEN(llvm_gen_closure)
id_int, size_int);
llvm::Value* comp_void_ptr = return_ptr;

// For the weighted closures, we need a surrounding "if" so that it's safe
// for osl_allocate_weighted_closure_component to return NULL (unless we
// know for sure that it's constant weighted and that the weight is
// not zero).
llvm::BasicBlock* next_block = NULL;
if (weighted && !(weight->is_constant() && !rop.is_zero(*weight))) {
llvm::BasicBlock* notnull_block = rop.ll.new_basic_block(
"non_null_closure");
next_block = rop.ll.new_basic_block("");
llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null());
rop.ll.op_branch(cond, notnull_block, next_block);
// new insert point is nonnull_block
}
// We need a surrounding "if" so that it's safe for closure allocation to
// return NULL, either because it has zero weight, or renderer services ran
// out of memory in the closure pool.
llvm::BasicBlock* notnull_block = rop.ll.new_basic_block(
"non_null_closure");
llvm::BasicBlock* next_block = rop.ll.new_basic_block("");
llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null());
rop.ll.op_branch(cond, notnull_block, next_block);
// new insert point is nonnull_block

llvm::Value* comp_ptr
= rop.ll.ptr_cast(comp_void_ptr, rop.llvm_type_closure_component_ptr());
Expand Down
88 changes: 69 additions & 19 deletions src/liboslexec/opclosure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,72 +7,120 @@

#include "oslexec_pvt.h"
#include <OSL/genclosure.h>
#include <OSL/rs_free_function.h>


OSL_NAMESPACE_BEGIN
namespace pvt {



OSL_SHADEOP const ClosureColor*
osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a,
const ClosureColor* b)
OSL_SHADEOP OSL_HOSTDEVICE const void*
osl_add_closure_closure(OpaqueExecContextPtr oec, const void* a_,
const void* b_)
{
const ClosureColor* a = (const ClosureColor*)a_;
const ClosureColor* b = (const ClosureColor*)b_;
if (a == NULL)
return b;
if (b == NULL)
return a;
return sg->context->closure_add_allot(a, b);
ClosureAdd* add = (ClosureAdd*)rs_allocate_closure(oec, sizeof(ClosureAdd),
alignof(ClosureAdd));
if (add) {
add->id = ClosureColor::ADD;
add->closureA = a;
add->closureB = b;
}
return add;
}


OSL_SHADEOP const ClosureColor*
osl_mul_closure_color(ShaderGlobals* sg, ClosureColor* a, const Color3* w)
OSL_SHADEOP OSL_HOSTDEVICE const void*
osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const void* w_)
{
const ClosureColor* a = (const ClosureColor*)a_;
const Color3* w = (const Color3*)w_;
if (a == NULL)
return NULL;
if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f)
return NULL;
if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f)
return a;
return sg->context->closure_mul_allot(*w, a);
ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul),
alignof(ClosureMul));
if (mul) {
mul->id = ClosureColor::MUL;
mul->weight = *w;
mul->closure = a;
}
return mul;
}


OSL_SHADEOP const ClosureColor*
osl_mul_closure_float(ShaderGlobals* sg, ClosureColor* a, float w)
OSL_SHADEOP OSL_HOSTDEVICE const void*
osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w)
{
const ClosureColor* a = (const ClosureColor*)a_;
if (a == NULL)
return NULL;
if (w == 0.0f)
return NULL;
if (w == 1.0f)
return a;
return sg->context->closure_mul_allot(w, a);
ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul),
alignof(ClosureMul));
if (mul) {
mul->id = ClosureColor::MUL;
mul->weight = Color3(w);
mul->closure = a;
}
return mul;
}


OSL_SHADEOP ClosureComponent*
osl_allocate_closure_component(ShaderGlobals* sg, int id, int size)
OSL_SHADEOP OSL_HOSTDEVICE void*
osl_allocate_closure_component(OpaqueExecContextPtr oec, int id, int size)
{
return sg->context->closure_component_allot(id, size, Color3(1.0f));
// Allocate the component and the mul back to back
const size_t needed = sizeof(ClosureComponent) + size;
ClosureComponent* comp
= (ClosureComponent*)rs_allocate_closure(oec, needed,
alignof(ClosureComponent));
if (comp) {
comp->id = id;
comp->w = Color3(1.0f);
}
return comp;
}



OSL_SHADEOP ClosureColor*
osl_allocate_weighted_closure_component(ShaderGlobals* sg, int id, int size,
const Color3* w)
OSL_SHADEOP OSL_HOSTDEVICE void*
osl_allocate_weighted_closure_component(OpaqueExecContextPtr oec, int id,
int size, const void* w_)
{
const Color3* w = (const Color3*)w_;
if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f)
return NULL;
return sg->context->closure_component_allot(id, size, *w);
// Allocate the component and the mul back to back
const size_t needed = sizeof(ClosureComponent) + size;
ClosureComponent* comp
= (ClosureComponent*)rs_allocate_closure(oec, needed,
alignof(ClosureComponent));
if (comp) {
comp->id = id;
comp->w = *w;
}
return comp;
}

// Deprecated, remove when conversion from ustring to ustringhash is finished
OSL_SHADEOP const char*
osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c)
osl_closure_to_string(OpaqueExecContextPtr oec, const void* c_)
{
ShaderGlobals* sg = (ShaderGlobals*)oec;
const ClosureColor* c = (const ClosureColor*)c_;
// Special case for printing closures
std::ostringstream stream;
stream.imbue(std::locale::classic()); // force C locale
Expand All @@ -82,8 +130,10 @@ osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c)
}

OSL_SHADEOP ustringhash_pod
osl_closure_to_ustringhash(ShaderGlobals* sg, ClosureColor* c)
osl_closure_to_ustringhash(OpaqueExecContextPtr oec, const void* c_)
{
ShaderGlobals* sg = (ShaderGlobals*)oec;
const ClosureColor* c = (const ClosureColor*)c_;
// Special case for printing closures
std::ostringstream stream;
stream.imbue(std::locale::classic()); // force C locale
Expand Down
42 changes: 2 additions & 40 deletions src/liboslexec/oslexec_pvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -2242,49 +2242,11 @@ class OSLEXECPUBLIC ShadingContext {
}
#endif

ClosureComponent* closure_component_allot(int id, size_t prim_size,
const Color3& w)
void* allocate_closure(size_t size, size_t alignment)
{
// Allocate the component and the mul back to back
size_t needed = sizeof(ClosureComponent) + prim_size;
ClosureComponent* comp = (ClosureComponent*)m_closure_pool.alloc(
needed, alignof(ClosureComponent));
comp->id = id;
comp->w = w;
return comp;
return m_closure_pool.alloc(size, alignment);
}

ClosureMul* closure_mul_allot(const Color3& w, const ClosureColor* c)
{
ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul),
alignof(ClosureMul));
mul->id = ClosureColor::MUL;
mul->weight = w;
mul->closure = c;
return mul;
}

ClosureMul* closure_mul_allot(float w, const ClosureColor* c)
{
ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul),
alignof(ClosureMul));
mul->id = ClosureColor::MUL;
mul->weight.setValue(w, w, w);
mul->closure = c;
return mul;
}

ClosureAdd* closure_add_allot(const ClosureColor* a, const ClosureColor* b)
{
ClosureAdd* add = (ClosureAdd*)m_closure_pool.alloc(sizeof(ClosureAdd),
alignof(ClosureAdd));
add->id = ClosureColor::ADD;
add->closureA = a;
add->closureB = b;
return add;
}


/// Find the named symbol in the (already-executed!) stack of shaders of
/// the given use. If a layer is given, search just that layer. If no
/// layer is specified, priority is given to later laters over earlier
Expand Down
13 changes: 13 additions & 0 deletions src/liboslexec/rs_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <OSL/journal.h>

#include "oslexec_pvt.h"

// Fallback is to reroute calls back through the virtual function
// based RendererServices from ShaderGlobals.
Expand Down Expand Up @@ -316,6 +317,18 @@ rs_trace_get(OSL::OpaqueExecContextPtr exec_ctx, OSL::ustringhash name,
#endif
}

OSL_RSOP OSL_HOSTDEVICE void*
rs_allocate_closure(OSL::OpaqueExecContextPtr exec_ctx, size_t size,
size_t alignment)
{
#ifndef __CUDA_ARCH__
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this is rs_fallback.cpp, it should never be compiled by CUDA, can we remove the #ifndef CUDA_ARCH

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything in rs_fallback is done this way. It is compiled by Cuda, is it not?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its is not, rs_fallback.cpp only exists in
set (lib_src
which is only compiled for the host, it turns around and just calls the virtual functions of renderer services (which are only on the host) and has no way to be customized for other targets.
More of a rs_legacy_adapter.cpp

auto sg = get_sg(exec_ctx);
return sg->context->allocate_closure(size, alignment);
#else
return nullptr;
#endif
}

OSL_RSOP OSL_HOSTDEVICE void
rs_errorfmt(OSL::OpaqueExecContextPtr exec_ctx,
OSL::ustringhash fmt_specification, int32_t count,
Expand Down
11 changes: 7 additions & 4 deletions src/testrender/cuda/optix_raytracer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,19 @@ __device__ __constant__ RenderParams render_params;


static inline __device__ void
execute_shader(ShaderGlobalsType& sg, const int shader_id, char* closure_pool)
execute_shader(ShaderGlobalsType& sg, const int shader_id,
StackClosurePool& closure_pool)
{
if (shader_id < 0) {
// TODO: should probably never get here ...
return;
}

// Pack the "closure pool" into one of the ShaderGlobals pointers
*(int*)&closure_pool[0] = 0;
sg.renderstate = &closure_pool[0];
closure_pool.reset();
RenderState renderState;
// TODO: renderState.context = ...
renderState.closure_pool = &closure_pool;
sg.renderstate = &renderState;

// Pack the pointers to the options structs in a faux "context",
// which is a rough stand-in for the host ShadingContext.
Expand Down
Loading
Loading