Okay here we go:
My resources are registered on the game thread via the following dx/cuda calls, after begin play has been called - code isn’t perfectly clean but you should get the idea:
Header:
/**
* Viewport width and height
*/
int32 Width;
int32 Height;
/**
* Right eye color texture used for post processing. Cuda copies the optix results into this texture.
*/
TWeakObjectPtr<UTexture2D> OutputTexture;
/**
* Right eye depth texture used for post processing. Cuda copies the optix results into this texture.
*/
TWeakObjectPtr<UTexture2D> DepthTexture;
/**
* Left eye color texture used for post processing. Cuda copies the optix results into this texture.
*/
TWeakObjectPtr<UTexture2D> OutputTexture2;
/**
* Left eye depth texture used for post processing. Cuda copies the optix results into this texture.
*/
TWeakObjectPtr<UTexture2D> DepthTexture2;
/**
* Ortho pass texture used for post processing. Cuda copies the optix results into this texture.
* The ortho pass is used for single eye orthogonal rendering.
*/
TWeakObjectPtr<UTexture2D> OutputTextureOrtho;
TWeakObjectPtr<UTexture2D> DepthTextureOrtho;
/**
* Cuda graphic resources associated with the corresponding unreal textures.
*/
cudaGraphicsResource* CudaResourceDepthLeft;
cudaGraphicsResource* CudaResourceDepthRight;
cudaGraphicsResource* CudaResourceColorLeft;
cudaGraphicsResource* CudaResourceColorRight;
cudaGraphicsResource* CudaResourceIntersections;
cudaGraphicsResource* CudaResourceColorOrtho;
cudaGraphicsResource* CudaResourceDepthOrtho;
/**
* Cuda memory allocated and used to copy the optix results into the direct x textures.
*/
float* CudaLinearMemoryDepth;
float4* CudaLinearMemoryColor;
void* CudaLinearMemoryIntersections;
/**
* Cuda resource array used for easy mapping. Duplicate to above direct pointers.
*/
cudaGraphicsResource *Resources[7];
Unreal texture init:
void FOptiXContextManager::InitRendering()
{
TRACE_CPUPROFILER_EVENT_SCOPE("FOptiXContextManager::InitRendering")
UE_LOG(OptiXContextManagerLog, Display, TEXT("Initializing Rendering in ContextManager"));
// Are we using an HMD?
if (GEngine->XRSystem.IsValid() && GEngine->XRSystem->GetHMDDevice() != nullptr)
{
UE_LOG(OptiXContextManagerLog, Display, TEXT("Got HMD in ContextManager"));
bWithHMD = GEngine->XRSystem->GetHMDDevice()->IsHMDEnabled();
}
else
{
UE_LOG(OptiXContextManagerLog, Display, TEXT("Running without HMD in ContextManager"));
bWithHMD = false;
}
// Viewport size:
FViewport* CurrentViewport = GEngine->GameViewport->Viewport;
Width = CurrentViewport->GetSizeXY().X / 2.0;
Height = CurrentViewport->GetSizeXY().Y;
UE_LOG(OptiXContextManagerLog, Display, TEXT("Got viewport sizes: %i, %i"), Width, Height);
UE_LOG(OptiXContextManagerLog, Warning, TEXT("Full Res: %i %i"), Width * 2, Height);
// Apparently those can be 0 in a packaged build?
// Catch that case:
if (Width == 0 || Height == 0)
{
UGameUserSettings* GameSettings = GEngine->GetGameUserSettings();
Width = GameSettings->GetScreenResolution().X;
Height = GameSettings->GetScreenResolution().Y;
UE_LOG(OptiXContextManagerLog, Display, TEXT("Fallback to viewport size in settings: %i, %i"), Width, Height);
}
// Create the textures:
OutputTexture = UTexture2D::CreateTransient(Width, Height, PF_A32B32G32R32F);
OutputTexture->AddToRoot();
//// Allocate the texture HRI
OutputTexture->UpdateResource();
DepthTexture = UTexture2D::CreateTransient(Width, Height, PF_R32_FLOAT);
DepthTexture->AddToRoot();
//// Allocate the texture HRI
DepthTexture->UpdateResource();
if (bWithHMD)
{
OutputTexture2 = UTexture2D::CreateTransient(Width, Height, PF_A32B32G32R32F);
OutputTexture2->AddToRoot();
//// Allocate the texture HRI
OutputTexture2->UpdateResource();
DepthTexture2 = UTexture2D::CreateTransient(Width, Height, PF_R32_FLOAT);
DepthTexture2->AddToRoot();
//// Allocate the texture HRI
DepthTexture2->UpdateResource();
OutputTextureOrtho = UTexture2D::CreateTransient(Width, Height, PF_A32B32G32R32F);
OutputTextureOrtho->AddToRoot();
//// Allocate the texture HRI
OutputTextureOrtho->UpdateResource();
DepthTextureOrtho = UTexture2D::CreateTransient(Width, Height, PF_R32_FLOAT);
DepthTextureOrtho->AddToRoot();
//// Allocate the texture HRI
DepthTextureOrtho->UpdateResource();
}
UE_LOG(OptiXContextManagerLog, Display, TEXT("Created the Textures"));
// Laser Texture
LaserIntersectionTexture = UTexture2D::CreateTransient(LaserBufferWidth, LaserBufferHeight, PF_A32B32G32R32F); // TODO Hardcoded values
LaserIntersectionTexture->AddToRoot();
//// Allocate the texture HRI
LaserIntersectionTexture->UpdateResource();
// Set up the material
// Load the materials
RegularMaterial = LoadObject<UMaterial>(GetTransientPackage(), TEXT("Material'/OptiX/PPMaterials/TextureMaterial.TextureMaterial'"));
VRMaterial = LoadObject<UMaterial>(GetTransientPackage(), TEXT("Material'/OptiX/PPMaterials/TextureMaterialVR.TextureMaterialVR'"));
LaserMaterial = LoadObject<UMaterial>(GetTransientPackage(), TEXT("Material'/OptiX/Laser/LaserMaterial.LaserMaterial'"));
LaserMaterialDynamic = UMaterialInstanceDynamic::Create(LaserMaterial.Get(), GetTransientPackage(), "DynamicLaserMaterial");
LaserMaterialDynamic->SetTextureParameterValue("IntersectionTexture", LaserIntersectionTexture.Get());
LaserMaterialDynamic->SetScalarParameterValue("Lines", 50);
LaserMaterialDynamic->SetScalarParameterValue("Segments", 20);
if(RegularMaterial == nullptr || VRMaterial == nullptr)
{
UE_LOG(OptiXContextManagerLog, Error, TEXT("Couldn't load dummy Material!"));
}
if (bWithHMD)
{
DynamicMaterial = UMaterialInstanceDynamic::Create(VRMaterial.Get(), GetTransientPackage(), "DynamicVRMaterial");
DynamicMaterial->SetTextureParameterValue("TextureRight", OutputTexture.Get());
DynamicMaterial->SetTextureParameterValue("DepthRight", DepthTexture.Get());
DynamicMaterial->SetTextureParameterValue("TextureLeft", OutputTexture2.Get());
DynamicMaterial->SetTextureParameterValue("DepthLeft", DepthTexture2.Get());
DynamicMaterialOrtho = UMaterialInstanceDynamic::Create(RegularMaterial.Get(), GetTransientPackage(), "DynamicNonVRMaterial");
DynamicMaterialOrtho->SetTextureParameterValue("Texture", OutputTextureOrtho.Get());
DynamicMaterialOrtho->SetTextureParameterValue("Depth", DepthTextureOrtho.Get());
}
else
{
DynamicMaterial = UMaterialInstanceDynamic::Create(RegularMaterial.Get(), GetTransientPackage(), "DynamicNonVRMaterial");
DynamicMaterial->SetTextureParameterValue("Texture", OutputTexture.Get());
DynamicMaterial->SetTextureParameterValue("Depth", DepthTexture.Get());
}
Cuda Init:
void FOptiXContextManager::InitCUDADX()
{
// Setup DX:
check(IsInGameThread());
// Depth
{
FD3D11TextureBase* D3D11TextureDepthLeft = GetD3D11TextureFromRHITexture(DepthTexture2->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceDepthLeft, D3D11TextureDepthLeft->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
{
FD3D11TextureBase* D3D11TextureDepthRight = GetD3D11TextureFromRHITexture(DepthTexture->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceDepthRight, D3D11TextureDepthRight->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
// Color
{
FD3D11TextureBase* D3D11TextureColorLeft = GetD3D11TextureFromRHITexture(OutputTexture2->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceColorLeft, D3D11TextureColorLeft->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
{
FD3D11TextureBase* D3D11TextureColorRight = GetD3D11TextureFromRHITexture(OutputTexture->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceColorRight, D3D11TextureColorRight->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
// Intersection
{
FD3D11TextureBase* D3D11TextureIntersections = GetD3D11TextureFromRHITexture(LaserIntersectionTexture->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceIntersections, D3D11TextureIntersections->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
// Ortho
{
FD3D11TextureBase* D3D11TextureOrthoDepth = GetD3D11TextureFromRHITexture(DepthTextureOrtho->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceDepthOrtho, D3D11TextureOrthoDepth->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
{
FD3D11TextureBase* D3D11TextureOrthoOutput = GetD3D11TextureFromRHITexture(OutputTextureOrtho->Resource->TextureRHI);
cudaGraphicsD3D11RegisterResource(&CudaResourceColorOrtho, D3D11TextureOrthoOutput->GetResource(), cudaGraphicsRegisterFlagsNone);
PrintLastCudaError("cudaGraphicsD3D11RegisterResource");
}
cudaMalloc((void**)&CudaLinearMemoryDepth, Width * Height * sizeof(float) * 2);
PrintLastCudaError("cudaMalloc");
cudaMalloc((void**)&CudaLinearMemoryColor, Width * Height * 4 * sizeof(float) * 2);
PrintLastCudaError("cudaMalloc");
cudaMalloc(&CudaLinearMemoryIntersections, LaserBufferWidth * LaserBufferHeight * 4 * sizeof(float));
PrintLastCudaError("cudaMalloc");
NativeContext"result_depth"]->getBuffer()->setDevicePointer(0, CudaLinearMemoryDepth);
NativeContext"result_color"]->getBuffer()->setDevicePointer(0, CudaLinearMemoryColor);
NativeContext"result_laser"]->getBuffer()->setDevicePointer(0, CudaLinearMemoryIntersections);
FString DeviceName = FString(NativeContext->getDeviceName(0).c_str());
UE_LOG(OptiXContextManagerLog, Display, TEXT("Device Count: %i"), NativeContext->getDeviceCount());
UE_LOG(OptiXContextManagerLog, Display, TEXT("Device Name 0: %s"), *DeviceName);
Resources[0] = CudaResourceDepthLeft;
Resources[1] = CudaResourceColorLeft;
Resources[2] = CudaResourceDepthRight;
Resources[3] = CudaResourceColorRight;
Resources[4] = CudaResourceIntersections;
Resources[5] = CudaResourceColorOrtho;
Resources[6] = CudaResourceDepthOrtho;
And the actual interesting part, the texture update. This is called on the render thread in my case to deal with VR late update shenanigans. If you don’t care about those,
you can execute this on the game thread as well, but it requires completely different syntax.
void FOptiXContextManager::LaunchStandardTrace(FRHICommandListImmediate& RHICmdList, FSceneViewFamily& InViewFamily)
{
TRACE_CPUPROFILER_EVENT_SCOPE("FOptiXContextManager::PreRenderViewFamily_RenderThread")
if (bEndPlayReceived)
{
return;
}
if (InViewFamily.Views.Num() < 2)
return;
// Get the views for the respective eyes:
EStereoscopicPass LeftEye = EStereoscopicPass::eSSP_LEFT_EYE;
EStereoscopicPass RightEye = EStereoscopicPass::eSSP_RIGHT_EYE;
const FSceneView& LeftEyeView = InViewFamily.GetStereoEyeView(LeftEye);
const FSceneView& RightEyeView = InViewFamily.GetStereoEyeView(RightEye);
// Set the required matrices
NativeContext"invViewProjectionLeft"]->setMatrix4x4fv(true, &LeftEyeView.ViewMatrices.GetInvViewProjectionMatrix().M[0][0]);
NativeContext"viewProjectionLeft"]->setMatrix4x4fv(true, &LeftEyeView.ViewMatrices.GetViewProjectionMatrix().M[0][0]);
NativeContext"invViewProjectionRight"]->setMatrix4x4fv(true, &RightEyeView.ViewMatrices.GetInvViewProjectionMatrix().M[0][0]);
NativeContext"viewProjectionRight"]->setMatrix4x4fv(true, &RightEyeView.ViewMatrices.GetViewProjectionMatrix().M[0][0]);
//NativeContext->validate();
{
TRACE_CPUPROFILER_EVENT_SCOPE("FOptiXContextManager::Trace")
//bIsTracing.AtomicSet(true);
// Execute the actual trace
NativeContext->launch(0, Width, Height);
//bIsTracing.AtomicSet(false);
//return;
}
{
// Check cuda resources for NULL. Shouldn't be needed as they *should* never be NULL.
if (Resources[0] == NULL || Resources[1] == NULL || Resources[2] == NULL || Resources[3] == NULL)
{
UE_LOG(OptiXContextManagerLog, Error, TEXT("CUDA Resources are NULL"));
return;
}
// Map the four graphics resources corresponding to color, depth for both eyes.
cudaGraphicsMapResources(4, Resources, 0);
PrintLastCudaError("cudaGraphicsMapResources");
// Map the left eye color resource to a cudaArray
cudaArray *CuArrayColorLeft;
cudaGraphicsSubResourceGetMappedArray(&CuArrayColorLeft, CudaResourceColorLeft, 0, 0);
PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
// Copy the result of the optix 2D color buffer into the mapped array.
// As both passes are written into the same buffer, this copies only the first half corresponding to the left eye.
cudaMemcpy2DToArray(
CuArrayColorLeft, // dst array
0, 0, // offset
CudaLinearMemoryColor, Width * 4 * sizeof(float), // src
Width * 4 * sizeof(float), Height, // extent
cudaMemcpyDeviceToDevice); // kind
PrintLastCudaError("cudaMemcpy2DToArray");
// Copy Color Right
cudaArray *CuArrayColorRight;
cudaGraphicsSubResourceGetMappedArray(&CuArrayColorRight, CudaResourceColorRight, 0, 0);
PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
// Copy the result of the optix 2D color buffer into the mapped array.
// As this copies the into the right eye, the buffer pointer needs to be offset by (Height * Width)
// to copy the second half.
cudaMemcpy2DToArray(
CuArrayColorRight, // dst array
0, 0, // offset
CudaLinearMemoryColor + (Height * Width), Width * 4 * sizeof(float), // src
Width * 4 * sizeof(float), Height, // extent
cudaMemcpyDeviceToDevice); // kind
PrintLastCudaError("cudaMemcpy2DToArray");
// Copy Depth Left
cudaArray *CuArrayDepthLeft;
cudaGraphicsSubResourceGetMappedArray(&CuArrayDepthLeft, CudaResourceDepthLeft, 0, 0);
PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
cudaMemcpy2DToArray(
CuArrayDepthLeft, // dst array
0, 0, // offset
CudaLinearMemoryDepth, Width * sizeof(float), // src
Width * sizeof(float), Height, // extent
cudaMemcpyDeviceToDevice); // kind
PrintLastCudaError("cudaMemcpy2DToArray");
// Copy Depth Right
cudaArray *CuArrayDepthRight;
cudaGraphicsSubResourceGetMappedArray(&CuArrayDepthRight, CudaResourceDepthRight, 0, 0);
PrintLastCudaError("cudaGraphicsSubResourceGetMappedArray");
cudaMemcpy2DToArray(
CuArrayDepthRight, // dst array
0, 0, // offset
CudaLinearMemoryDepth + (Height * Width), Width * sizeof(float), // src
Width * sizeof(float), Height, // extent
cudaMemcpyDeviceToDevice); // kind
PrintLastCudaError("cudaMemcpy2DToArray");
cudaGraphicsUnmapResources(4, Resources, 0);
PrintLastCudaError("cudaGraphicsUnmapResources");
//UpdateCubemapBuffer(RHICmdList);
}