VectorMatrixMultiply

I am curious about how VectorMatrixMultiply works in Unreal Engine 5.

void VectorMatrixMultiply(FMatrix44f* Result, const FMatrix44f* Matrix1, const FMatrix44f* Matrix2)
{
const VectorRegister4Float* A = (const VectorRegister4Float*)Matrix1;
const VectorRegister4Float* B = (const VectorRegister4Float*)Matrix2;
VectorRegister4Float* R = (VectorRegister4Float*)Result;
VectorRegister4Float Temp, R0, R1, R2;

// First row of result (Matrix1[0] * Matrix2).
Temp = VectorMultiply(VectorReplicate(A[0], 0), B[0]);
Temp = VectorMultiplyAdd(VectorReplicate(A[0], 1), B[1], Temp);
Temp = VectorMultiplyAdd(VectorReplicate(A[0], 2), B[2], Temp);
R0 = VectorMultiplyAdd(VectorReplicate(A[0], 3), B[3], Temp);

// Second row of result (Matrix1[1] * Matrix2).
Temp = VectorMultiply(VectorReplicate(A[1], 0), B[0]);
Temp = VectorMultiplyAdd(VectorReplicate(A[1], 1), B[1], Temp);
Temp = VectorMultiplyAdd(VectorReplicate(A[1], 2), B[2], Temp);
R1 = VectorMultiplyAdd(VectorReplicate(A[1], 3), B[3], Temp);

// Third row of result (Matrix1[2] * Matrix2).
Temp = VectorMultiply(VectorReplicate(A[2], 0), B[0]);
Temp = VectorMultiplyAdd(VectorReplicate(A[2], 1), B[1], Temp);
Temp = VectorMultiplyAdd(VectorReplicate(A[2], 2), B[2], Temp);
R2 = VectorMultiplyAdd(VectorReplicate(A[2], 3), B[3], Temp);

// Fourth row of result (Matrix1[3] * Matrix2).
Temp = VectorMultiply(VectorReplicate(A[3], 0), B[0]);
Temp = VectorMultiplyAdd(VectorReplicate(A[3], 1), B[1], Temp);
Temp = VectorMultiplyAdd(VectorReplicate(A[3], 2), B[2], Temp);
Temp = VectorMultiplyAdd(VectorReplicate(A[3], 3), B[3], Temp);

// Store result. Must not be done during steps above in case source and destination are the same.
R[0] = R0;
R[1] = R1;
R[2] = R2;
R[3] = Temp;

}

This code does not produce the correct multiplication result even if the two matrices are passed as is or the second matrix is ​​transposed.

How do I use this function correctly?