Read back data from AppendStructuredBuffer in UE5

Can’t figure out how to read back data from an AppendStructuredBuffer in a compute shader in UE5.2.

I have this simple shader which fills an AppendStructuredBuffer with 1s:

AppendStructuredBuffer<int> OutputAppendBuffer;

[numthreads(THREADGROUPSIZE_X, THREADGROUPSIZE_Y, THREADGROUPSIZE_Z)]
void Main(uint3 ID : SV_DispatchThreadID)
{
	OutputAppendBuffer.Append(1);
}

Here is the declaration in C++:

class COMPUTESHADERDECLARATIONS_API FAppendBufferTestCSDeclaration : public FGlobalShader
{
public:
	DECLARE_GLOBAL_SHADER(FAppendBufferTestCSDeclaration);
	SHADER_USE_PARAMETER_STRUCT(FAppendBufferTestCSDeclaration, FGlobalShader);

	BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
		SHADER_PARAMETER_RDG_BUFFER_UAV(AppendStructuredBuffer<int32>, OutputAppendBuffer)
	END_SHADER_PARAMETER_STRUCT()
	
public:
	static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
	{
		return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5);
	}

	static inline void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
	{
		FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);

		OutEnvironment.SetDefine(TEXT("THREADGROUPSIZE_X"), THREADGROUPSIZE_X);
		OutEnvironment.SetDefine(TEXT("THREADGROUPSIZE_Y"), THREADGROUPSIZE_Y);
		OutEnvironment.SetDefine(TEXT("THREADGROUPSIZE_Z"), THREADGROUPSIZE_Z);
	}
};

I then dispatch the shader and try to read back the data like so:

check(IsInRenderingThread())

FRDGBuilder GraphBuilder(RHICmdList);

// Get shader reference
TShaderMapRef<FAppendBufferTestCSDeclaration> ComputeShader(GetGlobalShaderMap(GMaxRHIFeatureLevel));

if (ComputeShader.IsValid())
{
	FAppendBufferTestCSDeclaration::FParameters* PassParameters = GraphBuilder.AllocParameters<FAppendBufferTestCSDeclaration::FParameters>();

	// Hard coded group count for testing purposes.
	FIntVector GroupCount = {1, 1, 1};
	uint32 NumDispatchedThreads = GroupCount.X * GroupCount.Y * GroupCount.Z * THREADGROUPSIZE_X * THREADGROUPSIZE_Y * THREADGROUPSIZE_Z;
	
	FRDGBufferRef OutputAppendBufferRef = GraphBuilder.CreateBuffer(
		FRDGBufferDesc::CreateBufferDesc(sizeof(int32), NumDispatchedThreads),
		TEXT("OutputAppendBuffer"),
		ERDGBufferFlags::None
	);
	PassParameters->OutputAppendBuffer = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(OutputAppendBufferRef));
	
	GraphBuilder.AddPass(
		RDG_EVENT_NAME("ExecuteAppendBufferTestCS"),
		PassParameters,
		ERDGPassFlags::AsyncCompute,
		[&PassParameters, ComputeShader, GroupCount](FRHIComputeCommandList& RHICmdList)
		{
			FComputeShaderUtils::Dispatch(RHICmdList, ComputeShader, *PassParameters, GroupCount);		
		}
	);

	TRefCountPtr<FRDGPooledBuffer> PooledAppendBuffer;
	GraphBuilder.QueueBufferExtraction(OutputAppendBufferRef, &PooledAppendBuffer, ERHIAccess::CPURead);

	GraphBuilder.Execute();

	// Read data
	FRHIBuffer* AppendBuffer = PooledAppendBuffer->GetRHI();
	int32* AppendBufferData = (int32*)RHILockBuffer(AppendBuffer, 0, sizeof(int32) * NumDispatchedThreads, RLM_ReadOnly);
	// Probably unnecessary copy of data
	int32* AppendBufferDataCopy = new int32[sizeof(int32) * NumDispatchedThreads];
	FMemory::Memcpy(AppendBufferDataCopy, AppendBufferData, sizeof(int32) * NumDispatchedThreads);
	for (uint32 i = 0; i < NumDispatchedThreads; i++)
	{
		UE_LOG(LogTemp, Display, TEXT("Output data : %d"), AppendBufferDataCopy[i])
	}
	delete[] AppendBufferDataCopy;
	RHIUnlockBuffer(AppendBuffer);
}
else
{
	UE_LOG(LogTemp, Warning, TEXT("Shader reference of 'FAppendDataTestCSDeclaration' is invalid."))
}

I dispatch one thread group of 8 threads, and this is the output:

LogTemp: Display: Output data : 1
LogTemp: Display: Output data : 15
LogTemp: Display: Output data : 27
LogTemp: Display: Output data : 40
LogTemp: Display: Output data : 43
LogTemp: Display: Output data : 44
LogTemp: Display: Output data : 45
LogTemp: Display: Output data : 46

It doesn’t matter what I fill the buffer with, only the first element will be correct. The other elements are always filled with the same values like above. If I use a RWStructuredBuffer instead everything works like expected.

Any help very much appreciated!

Hi there Kent! I am facing the exact same issue. Did you ever get to figure out the way to use these
AppendStructuredBuffers?