Reading output buffer - Compute Shaders

Like many people trying to get into compute shaders with Unreal, start with the excellent demo of Temeran: GitHub - Temaran/UE4ShaderPluginDemo: A tutorial project that shows how to implement HLSL Pixel and Compute shaders in UE4

However, for my project I’m more interested in having a data array as an output rather than a render target such as a texture. Furthermore, I also require a texture as input. So I’ve made some attempts at altering the demo project to fit these goals, but I’m getting stuck at finding the right manner of reading out the data again using structured buffers. So ultimately, somewhere in my game thread I would like to receive the output of the compute shader in some tarray structure. Here is my adjustments to the original demo code so far:

ComputeShaderExample.cpp:

...

	BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
		SHADER_PARAMETER_TEXTURE(Texture2D<FVector4>, InputTexture)
                ...
		SHADER_PARAMETER_UAV(RWStructuredBuffer<FVector4>, OutputBuffer)
	END_SHADER_PARAMETER_STRUCT()

...

void FComputeShaderExample::RunComputeShader_RenderThread(FRHICommandListImmediate& RHICmdList, const FShaderUsageExampleParameters& DrawParameters, FTextureRHIRef inputTexture, FUnorderedAccessViewRHIRef outputBuffer_UAV)
{
	QUICK_SCOPE_CYCLE_COUNTER(STAT_ShaderPlugin_ComputeShader); // Used to gather CPU profiling data for the UE4 session frontend
	SCOPED_DRAW_EVENT(RHICmdList, ShaderPlugin_Compute); // Used to profile GPU activity and add metadata to be consumed by for example RenderDoc

	UnbindRenderTargets(RHICmdList);
	
	FComputeShaderExampleCS::FParameters PassParameters;
	PassParameters.InputTexture = inputTexture;
        ...
	PassParameters.OutputBuffer = outputBuffer_UAV;

	TShaderMapRef<FComputeShaderExampleCS> ComputeShader(GetGlobalShaderMap(GMaxRHIFeatureLevel));
	FComputeShaderUtils::Dispatch(RHICmdList, *ComputeShader, PassParameters, 
								FIntVector(FMath::DivideAndRoundUp(DrawParameters.GetRenderTargetSize().X, NUM_THREADS_PER_GROUP_DIMENSION),
										   FMath::DivideAndRoundUp(DrawParameters.GetRenderTargetSize().Y, NUM_THREADS_PER_GROUP_DIMENSION), 1));
}

ShaderDeclarationDemoModule.cpp:

void FShaderDeclarationDemoModule::Draw_RenderThread(const FShaderUsageExampleParameters& DrawParameters)
{
	check(IsInRenderingThread());

	if (!DrawParameters.RenderTarget)
	{
		return;
	}

	FRHICommandListImmediate& RHICmdList = GRHICommandList.GetImmediateCommandList();

	QUICK_SCOPE_CYCLE_COUNTER(STAT_ShaderPlugin_Render); // Used to gather CPU profiling data for the UE4 session frontend
	SCOPED_DRAW_EVENT(RHICmdList, ShaderPlugin_Render); // Used to profile GPU activity and add metadata to be consumed by for example RenderDoc

	OutputBuffer_array_.SetNum(32);
	OutputBuffer_resource_.ResourceArray = &OutputBuffer_array_;
	OutputBuffer_buffer_ = RHICreateStructuredBuffer(sizeof(float), sizeof(FVector4) * 32, BUF_ShaderResource | BUF_UnorderedAccess, OutputBuffer_resource_);
	OutputBuffer_UAV_ = RHICreateUnorderedAccessView(OutputBuffer_buffer_, /* bool bUseUAVCounter */ false, /* bool bAppendBuffer */ false);


	FComputeShaderExample::RunComputeShader_RenderThread(RHICmdList, DrawParameters, DrawParameters.RenderTarget->GetRenderTargetResource()->TextureRHI, OutputBuffer_UAV_);
}

ShaderDeclarationDemoModule.h:

struct FShaderUsageExampleParameters
{
	UTextureRenderTarget2D* RenderTarget;
        ...
	TArray<FVector4> OutputBuffer;

	FIntPoint GetRenderTargetSize() const
	{
		return CachedRenderTargetSize;
	}

	FShaderUsageExampleParameters()	{ }
	FShaderUsageExampleParameters(UTextureRenderTarget2D* InRenderTarget, 
		TArray<FVector4> OutputBuffer)
		: RenderTarget(InRenderTarget),
		  OutputBuffer(OutputBuffer),
		  ...
	{
		CachedRenderTargetSize = InRenderTargetDepth ? FIntPoint(InRenderTarget->SizeX, InRenderTarget->SizeY) : FIntPoint::ZeroValue;
	}

...

private:
	TResourceArray<FVector4> OutputBuffer_array_;
	FRHIResourceCreateInfo OutputBuffer_resource_;
	FStructuredBufferRHIRef OutputBuffer_buffer_;
	FUnorderedAccessViewRHIRef OutputBuffer_UAV_;

ShaderUsageDemoCharacter.cpp

AShaderUsageDemoCharacter::AShaderUsageDemoCharacter()
{
	...
	capture_2D_ = CreateDefaultSubobject<USceneCaptureComponent2D>(TEXT("CaptureComp"));
}

void AShaderUsageDemoCharacter::PostInitializeComponents()
{
	Super::PostInitializeComponents();
	render_target_2D_ = NewObject<UTextureRenderTarget2D>();
	capture_2D_->SetRelativeRotation(FRotator(0, 0, 0));
	capture_2D_->SetRelativeLocation(FVector(0, 0, 0));
	capture_2D_->AttachTo(this->RootComponent);
	capture_2D_->CaptureSource = ESceneCaptureSource::SCS_FinalColorLDR;
	render_target_2D_->InitCustomFormat(1024, 1024, PF_B8G8R8A8, true);
	capture_2D_->TextureTarget = render_target_2D_;
	capture_2D_->bAlwaysPersistRenderingState = true;
	capture_2D_->bCaptureEveryFrame = false;
	capture_2D_->bCaptureOnMovement = false;
	capture_2D_->bUseCustomProjectionMatrix = false;
}
...
void AShaderUsageDemoCharacter::Tick(float DeltaSeconds)
	...
	capture_2D_->CaptureScene();
	FShaderUsageExampleParameters DrawParameters(render_target_2D_,  output_buffer_);
	{
		...
	}
	FShaderDeclarationDemoModule::Get().UpdateParameters(DrawParameters);

	// HERE I WOULD LIKE TO READ OUT THE DATA BUFFER OF THE SHADER

ShaderUsageDemoCharacter.h

.
public:
	UPROPERTY(BlueprintReadWrite, EditAnywhere)
	class USceneCaptureComponent2D* capture_2D_;

	UPROPERTY(BlueprintReadWrite, EditAnywhere)
	class UTextureRenderTarget2D* render_target_2D_;

	TArray<FVector4> output_buffer_;

Hey! It’s been a while, I know, but did you manage to find a solution?

I am currently hard-stuck trying to read entires textures in a fast way with UE5, and since reading pixel-per-pixel values from the CPU is extremely slow I thought compute shaders could be the way.

My idea is to input the texture and fit the values into an array (like an array of float4) with the same length as the number of pixels of the texture.

I wanted a shader that works with varying size textures, but I would have to change the number of threads dynamically depending on texture size, so I am not sure I can do that.

Then: can I return the array containing those pixel values in a way? How do I set it up?

Thanks for anyone who will respond :smiley: