Background neural networks for leaning agents neural network data assets

I don’t have an easy to share example that isn’t all mixed up with some proprietary code, but I will give you a quick gist of what to do:

// Header
#pragma once

#include "Containers/Array.h"
#include "LearningAgentsActions.h"
#include "LearningAgentsObservations.h"
#include "LearningAgentsPolicy.h"
#include "LearningArray.h"

#include "Templates/SharedPointer.h"
#include "NNE.h"
#include "NNERuntimeCPU.h"
#include "NNEModelData.h"


UCLASS(BlueprintType, Blueprintable)
class FNLEARNINGAGENTSRUNTIME_API ULearningAgentsONNXPolicy : public ULearningAgentsPolicy
{
	GENERATED_BODY()

public:
	UFUNCTION(BlueprintCallable)
	void LoadModel();

	UFUNCTION(BlueprintCallable)
	void RunNNE();

public:

	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "LearningAgents")
	UNNEModelData* ModelData;

	TSharedPtr<UE::NNE::IModelInstanceCPU> ModelInstance;

private:
	bool bModelLoaded = false;

	UE::NNE::FTensorBindingCPU ModelInput;
	UE::NNE::FTensorBindingCPU ModelOutput;

	TLearningArray<2, float> OutputActions;
};

// Cpp

#include "LearningAgentsManager.h"
#include "Kismet/GameplayStatics.h"
#include "LearningLog.h"
#include "Math/NumericLimits.h"
#include "Math/UnrealMathUtility.h"
#include "VisualLogger/VisualLogger.h"
#include "LearningFeatureObject.h"
#include "LearningArray.h"

void ULearningAgentsONNXPolicy::LoadModel()
{
	bModelLoaded = false;
	ModelInputs.Empty();
	ModelOutputs.Empty();

	TWeakInterfacePtr<INNERuntimeCPU> Runtime = UE::NNE::GetRuntime<INNERuntimeCPU>(FString("NNERuntimeORTCpu"));
	TSharedPtr<UE::NNE::IModelCPU> ModelCPU = Runtime->CreateModelCPU(ModelData);
	if (ModelCPU.IsValid())
	{
		ModelInstance = ModelCPU->CreateModelInstanceCPU();
	}
	else
	{
		UE_LOG(LogLearning, Error, TEXT("Model seems to be invalid. Check log for NNE error."));
		return;
	}

	// Setup Model Inputs

	TArray<UE::NNE::FTensorShape> ModelShapes;
	ModelShapes.Add(UE::NNE::FTensorShape::Make({(uint32)1 /** Batch size is 1 at inference time, or set to agent count */, (uint32)Interactor->GetObservationFeature().DimNum()})); // Always add the observation input
	// Add other inputs if you have multiple
	ModelInstance->SetInputTensorShapes(ModelShapes);

	// Setup Model Outputs
	OutputActions.SetNumUninitialized({ Interactor->GetActionFeature().DimNum() });
	UE::Learning::Array::Zero(OutputActions);
	ModelOutputs.Add({(void*)OutputActions.GetData(), Interactor->GetActionFeature().DimNum() * sizeof(float)});

	bModelLoaded = true;
	UE_LOG(LogLearning, Display, TEXT("NNE Model Loaded %s"), *ModelData->GetName());
}

void ULearningAgentsONNXPolicy::RunNNE()
{
	if (!ModelInstance || !bModelLoaded)
	{
		UE_LOG(LogLearning, Error, TEXT("NNE Model Instance Invalid - did you forget to call Load Model? or something bad happened during loading!"));
		return;
	}

	ModelInstance->RunSync(
		{ {(void*)Interactor->GetObservationFeature().FeatureBuffer()[0], Interactor->GetObservationFeature().DimNum() * sizeof(float)}},
		ModelOutputs);

	// TODO add a loop over agent ids or something
	int32 AgentId = 0;
	
	// Copy the output to LearningAgents
	UE::Learning::FFeatureObject& ActionFeature = Interactor->GetActionFeature();
	UE::Learning::Array::Copy(ActionFeature.InstanceData->View(ActionFeature.FeatureHandle)[AgentId], OutputActions);

	Interactor->GetActionEncodingAgentIteration()[AgentId]++;
}

This code likely doesn’t compile and probably has some issues, but will hopefully get you started. I was able to do this with the 5.3 version of learning agents and I’m not sure how feasible it is with 5.4 or 5.5.

If we have some time, I will see if we can squeeze this in as a feature without having to force users to do this in user code, but ideally ONNX is not needed in the future.

What operations are you trying to do that are not supposed in LA currently? Perhaps we can implement that way.

Additionally as i am using the imitation learning and have worked through the tutorial and have the cars successfully driving around the track. - is the model taking the frames along with the agents actions such as throttle speed, steering angle, ect to use as inputs that are concatenated together later in the model or is the process different?

Not sure I fully understand but the model only operates on observations and by default has a “memory” (unless you manually disabled it in the settings). The models don’t typically take in actions currently.

Thanks!