From 46e9b763fd5da96e8b24df4a63bee965c0edfb16 Mon Sep 17 00:00:00 2001 From: Tim Grant Date: Wed, 6 Nov 2024 11:43:46 -0700 Subject: [PATCH] Wrap the cudaMalloc and cudaMemcpy calls. Signed-off-by: Tim Grant --- src/testrender/optixraytracer.cpp | 217 ++++++++++-------------------- src/testrender/optixraytracer.h | 4 +- src/testshade/optixgridrender.cpp | 78 ++++------- src/testshade/optixgridrender.h | 4 +- 4 files changed, 100 insertions(+), 203 deletions(-) diff --git a/src/testrender/optixraytracer.cpp b/src/testrender/optixraytracer.cpp index a17e62937..ab5aff9d1 100644 --- a/src/testrender/optixraytracer.cpp +++ b/src/testrender/optixraytracer.cpp @@ -83,6 +83,12 @@ OSL_NAMESPACE_ENTER } \ } + +#define DEVICE_ALLOC(size) reinterpret_cast(device_alloc(size)) +#define COPY_TO_DEVICE(dst_device, src_host, size) \ + copy_to_device(reinterpret_cast(dst_device), src_host, size) + + static void context_log_cb(unsigned int level, const char* tag, const char* message, void* /*cbdata */) @@ -116,10 +122,10 @@ OptixRaytracer::~OptixRaytracer() { if (m_optix_ctx) OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); - for (void* ptr : m_ptrs_to_free) + for (CUdeviceptr ptr : m_ptrs_to_free) cudaFree(reinterpret_cast(ptr)); - for (void* ptr : m_arrays_to_free) - cudaFreeArray(reinterpret_cast(ptr)); + for (cudaArray_t arr : m_arrays_to_free) + cudaFreeArray(arr); } @@ -133,6 +139,7 @@ OptixRaytracer::device_alloc(size_t size) errhandler().errorfmt("cudaMalloc({}) failed with error: {}\n", size, cudaGetErrorString(res)); } + m_ptrs_to_free.push_back(reinterpret_cast(ptr)); return ptr; } @@ -237,18 +244,13 @@ OptixRaytracer::synch_attributes() const size_t podDataSize = cpuDataSize - sizeof(ustringhash) * numStrings; - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_color_system), - podDataSize + sizeof(ustringhash_pod) * numStrings)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_color_system), colorSys, - podDataSize, cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_color_system)); + d_color_system = DEVICE_ALLOC(podDataSize + + sizeof(ustringhash_pod) * numStrings); + COPY_TO_DEVICE(d_color_system, colorSys, podDataSize); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_osl_printf_buffer), - OSL_PRINTF_BUFFER_SIZE)); + d_osl_printf_buffer = DEVICE_ALLOC(OSL_PRINTF_BUFFER_SIZE); CUDA_CHECK(cudaMemset(reinterpret_cast(d_osl_printf_buffer), 0, OSL_PRINTF_BUFFER_SIZE)); - m_ptrs_to_free.push_back(reinterpret_cast(d_osl_printf_buffer)); // then copy the device string to the end, first strings starting at dataPtr - (numStrings) // FIXME -- Should probably handle alignment better. @@ -260,8 +262,7 @@ OptixRaytracer::synch_attributes() for (const ustringhash* end = cpuStringHash + numStrings; cpuStringHash < end; ++cpuStringHash) { ustringhash_pod devStr = cpuStringHash->hash(); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(gpuStrings), &devStr, - sizeof(devStr), cudaMemcpyHostToDevice)); + COPY_TO_DEVICE(gpuStrings, &devStr, sizeof(devStr)); gpuStrings += sizeof(ustringhash_pod); } } @@ -556,13 +557,10 @@ OptixRaytracer::create_shaders() } // Upload per-material interactive buffer table - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_interactive_params), - sizeof(void*) * material_interactive_params.size())); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_interactive_params), - material_interactive_params.data(), - sizeof(void*) * material_interactive_params.size(), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_interactive_params)); + d_interactive_params = DEVICE_ALLOC(sizeof(void*) + * material_interactive_params.size()); + COPY_TO_DEVICE(d_interactive_params, material_interactive_params.data(), + sizeof(void*) * material_interactive_params.size()); } @@ -650,12 +648,8 @@ OptixRaytracer::create_sbt() GenericRecord raygen_record; CUdeviceptr d_raygen_record; OPTIX_CHECK(optixSbtRecordPackHeader(m_raygen_group, &raygen_record)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygen_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_raygen_record), - &raygen_record, sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_raygen_record)); + d_raygen_record = DEVICE_ALLOC(sizeof(GenericRecord)); + COPY_TO_DEVICE(d_raygen_record, &raygen_record, sizeof(GenericRecord)); m_optix_sbt.raygenRecord = d_raygen_record; } @@ -665,12 +659,8 @@ OptixRaytracer::create_sbt() GenericRecord miss_record; CUdeviceptr d_miss_record; OPTIX_CHECK(optixSbtRecordPackHeader(m_miss_group, &miss_record)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_miss_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_miss_record), - &miss_record, sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_miss_record)); + d_miss_record = DEVICE_ALLOC(sizeof(GenericRecord)); + COPY_TO_DEVICE(d_miss_record, &miss_record, sizeof(GenericRecord)); m_optix_sbt.missRecordBase = d_miss_record; m_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); @@ -685,13 +675,9 @@ OptixRaytracer::create_sbt() OPTIX_CHECK( optixSbtRecordPackHeader(m_closesthit_group, &hitgroup_records[0])); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroup_records), - nhitgroups * sizeof(GenericRecord))); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_hitgroup_records), - &hitgroup_records[0], - nhitgroups * sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_hitgroup_records)); + d_hitgroup_records = DEVICE_ALLOC(nhitgroups * sizeof(GenericRecord)); + COPY_TO_DEVICE(d_hitgroup_records, &hitgroup_records[0], + nhitgroups * sizeof(GenericRecord)); m_optix_sbt.hitgroupRecordBase = d_hitgroup_records; m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(GenericRecord); @@ -709,13 +695,9 @@ OptixRaytracer::create_sbt() &callable_records[idx])); } - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callable_records), - (nshaders) * sizeof(GenericRecord))); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_callable_records), - callable_records.data(), - (nshaders) * sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_callable_records)); + d_callable_records = DEVICE_ALLOC((nshaders) * sizeof(GenericRecord)); + COPY_TO_DEVICE(d_callable_records, callable_records.data(), + (nshaders) * sizeof(GenericRecord)); m_optix_sbt.callablesRecordBase = d_callable_records; m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord); @@ -733,14 +715,9 @@ OptixRaytracer::create_sbt() CUdeviceptr d_setglobals_raygen_record; OPTIX_CHECK( optixSbtRecordPackHeader(m_setglobals_raygen_group, &record)); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_setglobals_raygen_record), - sizeof(GenericRecord))); - CUDA_CHECK( - cudaMemcpy(reinterpret_cast(d_setglobals_raygen_record), - &record, sizeof(GenericRecord), cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back( - reinterpret_cast(d_setglobals_raygen_record)); + d_setglobals_raygen_record = DEVICE_ALLOC(sizeof(GenericRecord)); + COPY_TO_DEVICE(d_setglobals_raygen_record, &record, + sizeof(GenericRecord)); m_setglobals_optix_sbt.raygenRecord = d_setglobals_raygen_record; } @@ -750,14 +727,9 @@ OptixRaytracer::create_sbt() GenericRecord record; CUdeviceptr d_setglobals_miss_record; OPTIX_CHECK(optixSbtRecordPackHeader(m_setglobals_miss_group, &record)); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_setglobals_miss_record), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_setglobals_miss_record), - &record, sizeof(GenericRecord), - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back( - reinterpret_cast(d_setglobals_miss_record)); + d_setglobals_miss_record = DEVICE_ALLOC(sizeof(GenericRecord)); + COPY_TO_DEVICE(d_setglobals_miss_record, &record, + sizeof(GenericRecord)); m_setglobals_optix_sbt.missRecordBase = d_setglobals_miss_record; m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord); @@ -797,20 +769,12 @@ OptixRaytracer::build_accel() accel_options.operation = OPTIX_BUILD_OPERATION_BUILD; const size_t vertices_size = sizeof(Vec3) * scene.verts.size(); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_vertices), vertices_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_vertices), - scene.verts.data(), vertices_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_vertices)); + d_vertices = DEVICE_ALLOC(vertices_size); + COPY_TO_DEVICE(d_vertices, scene.verts.data(), vertices_size); const size_t indices_size = scene.triangles.size() * sizeof(int32_t) * 3; - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_vert_indices), indices_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_vert_indices), - scene.triangles.data(), indices_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_vert_indices)); + d_vert_indices = DEVICE_ALLOC(indices_size); + COPY_TO_DEVICE(d_vert_indices, scene.triangles.data(), indices_size); const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE }; OptixBuildInput triangle_input = {}; @@ -836,9 +800,7 @@ OptixRaytracer::build_accel() CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_temp_buffer), gas_buffer_sizes.tempSizeInBytes)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_accel_output_buffer), - gas_buffer_sizes.outputSizeInBytes)); - m_ptrs_to_free.push_back(reinterpret_cast(d_accel_output_buffer)); + d_accel_output_buffer = DEVICE_ALLOC(gas_buffer_sizes.outputSizeInBytes); OPTIX_CHECK(optixAccelBuild( m_optix_ctx, 0, &accel_options, &triangle_input, 1, d_temp_buffer, @@ -855,16 +817,9 @@ OptixRaytracer::prepare_background() { if (getBackgroundShaderID() >= 0) { const int bg_res = std::max(32, getBackgroundResolution()); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_values), - 3 * sizeof(float) * bg_res * bg_res)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_rows), - sizeof(float) * bg_res)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_bg_cols), - sizeof(float) * bg_res * bg_res)); - - m_ptrs_to_free.push_back(reinterpret_cast(d_bg_values)); - m_ptrs_to_free.push_back(reinterpret_cast(d_bg_rows)); - m_ptrs_to_free.push_back(reinterpret_cast(d_bg_cols)); + d_bg_values = DEVICE_ALLOC(3 * sizeof(float) * bg_res * bg_res); + d_bg_rows = DEVICE_ALLOC(sizeof(float) * bg_res); + d_bg_cols = DEVICE_ALLOC(sizeof(float) * bg_res * bg_res); } } @@ -875,46 +830,29 @@ OptixRaytracer::upload_mesh_data() { // Upload the extra geometry data to the device const size_t uvs_size = sizeof(Vec2) * scene.uvs.size(); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_uvs), uvs_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_uvs), scene.uvs.data(), - uvs_size, cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_uvs)); + d_uvs = DEVICE_ALLOC(uvs_size); + COPY_TO_DEVICE(d_uvs, scene.uvs.data(), uvs_size); const size_t uv_indices_size = scene.uv_triangles.size() * sizeof(int32_t) * 3; - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_uv_indices), uv_indices_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_uv_indices), - scene.uv_triangles.data(), uv_indices_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_uv_indices)); + d_uv_indices = DEVICE_ALLOC(uv_indices_size); + COPY_TO_DEVICE(d_uv_indices, scene.uv_triangles.data(), uv_indices_size); const size_t normals_size = sizeof(Vec3) * scene.normals.size(); if (normals_size > 0) { - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_normals), normals_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_normals), - scene.normals.data(), normals_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_normals)); + d_normals = DEVICE_ALLOC(normals_size); + COPY_TO_DEVICE(d_normals, scene.normals.data(), normals_size); } const size_t normal_indices_size = scene.n_triangles.size() * sizeof(int32_t) * 3; - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_normal_indices), - normal_indices_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_normal_indices), - scene.n_triangles.data(), normal_indices_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_normal_indices)); + d_normal_indices = DEVICE_ALLOC(normal_indices_size); + COPY_TO_DEVICE(d_normal_indices, scene.n_triangles.data(), + normal_indices_size); const size_t shader_ids_size = scene.shaderids.size() * sizeof(int); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_shader_ids), shader_ids_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_shader_ids), - scene.shaderids.data(), shader_ids_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_shader_ids)); + d_shader_ids = DEVICE_ALLOC(shader_ids_size); + COPY_TO_DEVICE(d_shader_ids, scene.shaderids.data(), shader_ids_size); // TODO: These could be packed, but for now just use ints instead of bools std::vector shader_is_light; @@ -922,22 +860,16 @@ OptixRaytracer::upload_mesh_data() shader_is_light.push_back(is_light); const size_t shader_is_light_size = shader_is_light.size() * sizeof(int32_t); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_shader_is_light), - shader_is_light_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_shader_is_light), - shader_is_light.data(), shader_is_light_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_shader_is_light)); + d_shader_is_light = DEVICE_ALLOC(shader_is_light_size); + COPY_TO_DEVICE(d_shader_is_light, shader_is_light.data(), + shader_is_light_size); const size_t lightprims_size = OptixRaytracer::lightprims().size() * sizeof(uint32_t); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_lightprims), lightprims_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_lightprims), - OptixRaytracer::lightprims().data(), lightprims_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_lightprims)); + d_lightprims = DEVICE_ALLOC(lightprims_size); + COPY_TO_DEVICE(d_lightprims, OptixRaytracer::lightprims().data(), + lightprims_size); // Copy the mesh ID for each triangle to the device std::vector mesh_ids; @@ -948,11 +880,8 @@ OptixRaytracer::upload_mesh_data() mesh_ids.push_back(meshid); } const size_t mesh_ids_size = mesh_ids.size() * sizeof(int32_t); - CUDA_CHECK( - cudaMalloc(reinterpret_cast(&d_mesh_ids), mesh_ids_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_mesh_ids), mesh_ids.data(), - mesh_ids_size, cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_mesh_ids)); + d_mesh_ids = DEVICE_ALLOC(mesh_ids_size); + COPY_TO_DEVICE(d_mesh_ids, mesh_ids.data(), mesh_ids_size); // Copy the mesh surface areas to the device std::vector mesh_surfacearea; @@ -971,12 +900,9 @@ OptixRaytracer::upload_mesh_data() const size_t mesh_surfacearea_size = mesh_surfacearea.size() * sizeof(float); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_surfacearea), - mesh_surfacearea_size)); - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_surfacearea), - mesh_surfacearea.data(), mesh_surfacearea_size, - cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_surfacearea)); + d_surfacearea = DEVICE_ALLOC(mesh_surfacearea_size); + COPY_TO_DEVICE(d_surfacearea, mesh_surfacearea.data(), + mesh_surfacearea_size); } @@ -1061,7 +987,7 @@ OptixRaytracer::get_texture_handle(ustring filename, CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, 0, 0, level_pixels[0].data(), pitch, pitch, img_height, cudaMemcpyHostToDevice)); - m_arrays_to_free.push_back(reinterpret_cast(pixelArray)); + m_arrays_to_free.push_back(pixelArray); cudaResourceDesc res_desc = {}; res_desc.resType = cudaResourceTypeMipmappedArray; @@ -1123,12 +1049,8 @@ OptixRaytracer::warmup() void OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) { - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_output_buffer), - xres * yres * 4 * sizeof(float))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_launch_params), - sizeof(RenderParams))); - m_ptrs_to_free.push_back(reinterpret_cast(d_output_buffer)); - m_ptrs_to_free.push_back(reinterpret_cast(d_launch_params)); + d_output_buffer = DEVICE_ALLOC(xres * yres * 4 * sizeof(float)); + d_launch_params = DEVICE_ALLOC(sizeof(RenderParams)); m_xres = xres; m_yres = yres; @@ -1186,8 +1108,7 @@ OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) params.bg_rows = d_bg_rows; params.bg_cols = d_bg_cols; - CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_launch_params), ¶ms, - sizeof(RenderParams), cudaMemcpyHostToDevice)); + COPY_TO_DEVICE(d_launch_params, ¶ms, sizeof(RenderParams)); // Set up global variables OPTIX_CHECK(optixLaunch(m_optix_pipeline, m_cuda_stream, d_launch_params, diff --git a/src/testrender/optixraytracer.h b/src/testrender/optixraytracer.h index 1763420fd..b15a3147d 100644 --- a/src/testrender/optixraytracer.h +++ b/src/testrender/optixraytracer.h @@ -142,8 +142,8 @@ class OptixRaytracer final : public SimpleRaytracer { std::unordered_map m_samplers; // CUdeviceptrs that need to be freed after we are done - std::vector m_ptrs_to_free; - std::vector m_arrays_to_free; + std::vector m_ptrs_to_free; + std::vector m_arrays_to_free; }; diff --git a/src/testshade/optixgridrender.cpp b/src/testshade/optixgridrender.cpp index db9a8ac36..877a5691f 100644 --- a/src/testshade/optixgridrender.cpp +++ b/src/testshade/optixgridrender.cpp @@ -86,6 +86,11 @@ OSL_NAMESPACE_ENTER } +#define DEVICE_ALLOC(size) reinterpret_cast(device_alloc(size)) +#define COPY_TO_DEVICE(dst_device, src_host, size) \ + copy_to_device(reinterpret_cast(dst_device), src_host, size) + + static void context_log_cb(unsigned int level, const char* tag, const char* message, void* /*cbdata */) @@ -180,12 +185,12 @@ OptixGridRenderer::load_ptx_file(string_view filename) OptixGridRenderer::~OptixGridRenderer() { - for (void* p : m_ptrs_to_free) - cudaFree(p); - for (void* p : m_arrays_to_free) - cudaFreeArray(reinterpret_cast(p)); if (m_optix_ctx) OPTIX_CHECK(optixDeviceContextDestroy(m_optix_ctx)); + for (CUdeviceptr ptr : m_ptrs_to_free) + cudaFree(reinterpret_cast(ptr)); + for (cudaArray_t arr : m_arrays_to_free) + cudaFreeArray(arr); } @@ -252,30 +257,24 @@ OptixGridRenderer::synch_attributes() const size_t podDataSize = cpuDataSize - sizeof(ustringhash) * numStrings; - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_color_system), - podDataSize + sizeof(uint64_t) * numStrings)); + d_color_system = DEVICE_ALLOC(podDataSize + + sizeof(uint64_t) * numStrings); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_color_system), colorSys, podDataSize, cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_osl_printf_buffer), - OSL_PRINTF_BUFFER_SIZE)); + d_osl_printf_buffer = DEVICE_ALLOC(OSL_PRINTF_BUFFER_SIZE); CUDA_CHECK(cudaMemset(reinterpret_cast(d_osl_printf_buffer), 0, OSL_PRINTF_BUFFER_SIZE)); // Transforms - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_object2common), - sizeof(OSL::Matrix44))); + d_object2common = DEVICE_ALLOC(sizeof(OSL::Matrix44)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_object2common), &m_object2common, sizeof(OSL::Matrix44), cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_shader2common), - sizeof(OSL::Matrix44))); + d_shader2common = DEVICE_ALLOC(sizeof(OSL::Matrix44)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_shader2common), &m_shader2common, sizeof(OSL::Matrix44), cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_color_system)); - m_ptrs_to_free.push_back(reinterpret_cast(d_osl_printf_buffer)); - // then copy the device string to the end, first strings starting at dataPtr - (numStrings) // FIXME -- Should probably handle alignment better. const ustringhash* cpuStringHash @@ -730,26 +729,12 @@ OptixGridRenderer::make_optix_materials() setglobals_raygenRecord.data = nullptr; setglobals_missRecord.data = nullptr; - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygenRecord), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_missRecord), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroupRecord), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callablesRecord), - callables * sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_raygenRecord), - sizeof(GenericRecord))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_setglobals_missRecord), - sizeof(GenericRecord))); - - m_ptrs_to_free.push_back(reinterpret_cast(d_raygenRecord)); - m_ptrs_to_free.push_back(reinterpret_cast(d_missRecord)); - m_ptrs_to_free.push_back(reinterpret_cast(d_hitgroupRecord)); - m_ptrs_to_free.push_back(reinterpret_cast(d_callablesRecord)); - m_ptrs_to_free.push_back( - reinterpret_cast(d_setglobals_raygenRecord)); - m_ptrs_to_free.push_back(reinterpret_cast(d_setglobals_missRecord)); + d_raygenRecord = DEVICE_ALLOC(sizeof(GenericRecord)); + d_missRecord = DEVICE_ALLOC(sizeof(GenericRecord)); + d_hitgroupRecord = DEVICE_ALLOC(sizeof(GenericRecord)); + d_callablesRecord = DEVICE_ALLOC(callables * sizeof(GenericRecord)); + d_setglobals_raygenRecord = DEVICE_ALLOC(sizeof(GenericRecord)); + d_setglobals_missRecord = DEVICE_ALLOC(sizeof(GenericRecord)); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_raygenRecord), &raygenRecord, sizeof(GenericRecord), @@ -882,7 +867,7 @@ OptixGridRenderer::get_texture_handle(ustring filename, CUDA_CHECK(cudaMemcpy2DToArray(pixelArray, 0, 0, level_pixels[0].data(), pitch, pitch, img_height, cudaMemcpyHostToDevice)); - m_arrays_to_free.push_back(reinterpret_cast(pixelArray)); + m_arrays_to_free.push_back(pixelArray); cudaResourceDesc res_desc = {}; res_desc.resType = cudaResourceTypeMipmappedArray; @@ -941,13 +926,8 @@ OptixGridRenderer::warmup() void OptixGridRenderer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED) { - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_output_buffer), - xres * yres * 4 * sizeof(float))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_launch_params), - sizeof(RenderParams))); - - m_ptrs_to_free.push_back(reinterpret_cast(d_output_buffer)); - m_ptrs_to_free.push_back(reinterpret_cast(d_launch_params)); + d_output_buffer = DEVICE_ALLOC(xres * yres * 4 * sizeof(float)); + d_launch_params = DEVICE_ALLOC(sizeof(RenderParams)); m_xres = xres; m_yres = yres; @@ -1153,19 +1133,15 @@ OptixGridRenderer::register_named_transforms() } // Push the names and transforms to the device - size_t sz = sizeof(uint64_t) * xform_name_buffer.size(); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_xform_name_buffer), sz)); + size_t sz = sizeof(uint64_t) * xform_name_buffer.size(); + d_xform_name_buffer = DEVICE_ALLOC(sz); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_xform_name_buffer), xform_name_buffer.data(), sz, cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_xform_name_buffer)); - - sz = sizeof(OSL::Matrix44) * xform_buffer.size(); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_xform_buffer), sz)); + sz = sizeof(OSL::Matrix44) * xform_buffer.size(); + d_xform_buffer = DEVICE_ALLOC(sz); CUDA_CHECK(cudaMemcpy(reinterpret_cast(d_xform_buffer), xform_buffer.data(), sz, cudaMemcpyHostToDevice)); - m_ptrs_to_free.push_back(reinterpret_cast(d_xform_buffer)); - m_num_named_xforms = xform_name_buffer.size(); } diff --git a/src/testshade/optixgridrender.h b/src/testshade/optixgridrender.h index f689217d2..9cb2719fd 100644 --- a/src/testshade/optixgridrender.h +++ b/src/testshade/optixgridrender.h @@ -104,8 +104,8 @@ class OptixGridRenderer final : public SimpleRenderer { OSL::Matrix44 m_object2common; // "object" space to "common" space matrix // CUdeviceptrs that need to be freed after we are done - std::vector m_ptrs_to_free; - std::vector m_arrays_to_free; + std::vector m_ptrs_to_free; + std::vector m_arrays_to_free; };