Port Shadertoy JPEG compression GLSL shader to Unreal Engine

Hi everyone,

I recently found a JPEG compression shader on shadertoy:

I was wondering, if it is possible to translate the shader code from GLSL to HLSL and use it in a custom node in a post-processing material.

My approach would be to translate the individual code snippets from the different buffers into HLSL and create custom nodes from them. The variable ‘fragColor’ would probably be a ‘SceneTexture:PostProcessInput0’. I don’t understand how the different buffers work.

That was one of my failed attempts. Unfortunately, I keep getting compilation errors that I can’t resolve.

#define PI    3.14159265359
#define SQRT2 0.70710678118
#define NB_LEVELS 64.0
#define NB_FREQ    8

// Buffer B
float2 k = frac(fragCoord.xy / 8.0) - float2(0.5, 0.5);
float2 K = fragCoord.xy - float2(0.5, 0.5) - k;
float3 val = float3(0.0, 0.0, 0.0);

for (int x = 0; x < 8; ++x)
{
    for (int y = 0; y < 8; ++y)
    {
        float2 uv = (K + float2(x, y) + float2(0.5, 0.5)) / iResolution.xy;
        float2 freq = float2(x, y) + float2(0.5, 0.5);

        val += tex2D(iChannel0, uv).rgb *
               cos(PI * k.x * freq.x / 8.0) * cos(PI * k.y * freq.y / 8.0) *
               ((k.x < 0.5) ? SQRT2 : 1.0) * ((k.y < 0.5) ? SQRT2 : 1.0);
    }
}

fragColor = float4(val / 4.0, 0.0);

// Buffer C
if (fragColor.x < 0.5)
{
    fragColor = round(fragColor / 8.0 * NB_LEVELS) / NB_LEVELS * 8.0;
}

// Buffer D
val = float3(0.0, 0.0, 0.0);
for (int u = 0; u < NB_FREQ; ++u)
{
    for (int v = 0; v < NB_FREQ; ++v)
    {
        float2 uv = (K + float2(u, v) + float2(0.5, 0.5)) / iResolution.xy;
        float2 freq = float2(u, v) + float2(0.5, 0.5);

        val += fragColor.rgb *
               cos(PI * (k.x + 0.5) * (freq.x / 8.0)) * cos(PI * (k.y + 0.5) * (freq.y / 8.0)) *
               ((u == 0) ? SQRT2 : 1.0) * ((v == 0) ? SQRT2 : 1.0);
    }
}

fragColor = float4(val / 4.0, 1.0);

// Result
return fragColor;

I have very little knowledge of shader programming. Therefore, I was hoping that someone could help me to steer in the right direction.

Thank you in advance!

interested as well

It might be a little late but I gave this another try and came up with a working solution.

I studied how real JPEG compression actually works and “vibe coded” this HLSL code.
The main difference between this version and the shadertoy version is that I managed to do the different steps in one custom node instead of having multiple buffers.
I’m also making a conversion from RGB to YCbCr before the quantization step and from YBcCr back to RGB after the quantization step. I’m also using different quantization tables for the x- and y-axis.

I also had some issues with visible (non-wanted) artifacts, which is why I’m doing some clamping at the start and some bilinear interpolation at the end.
Also maybe of the RGB → YCbCr → RGB conversion I got some color space issues, which is why I’m doing some gamma correction at the very end.

Here is the HLSL code:

#define PI 3.14159265359
#define SQRT2 0.70710678118

// Inputs from Custom node:
// UV (float2)
// Resolution (float2) - full scene resolution
// STEPS (float) - DCT quantization steps
// Quality (float) - JPEG quality [0..100]

// --- Compute 8x8 block coordinates ---
float2 fc = UV * Resolution;
float2 grid = floor(fc / 8.0) * 8.0;
float2 uv8 = frac(fc / 8.0) * 8.0;

// --- JPEG quantization matrices ---
static const float QY[8][8] = {
    {16,11,10,16,24,40,51,61},
    {12,12,14,19,26,58,60,55},
    {14,13,16,24,40,57,69,56},
    {14,17,22,29,51,87,80,62},
    {18,22,37,56,68,109,103,77},
    {24,35,55,64,81,104,113,92},
    {49,64,78,87,103,121,120,101},
    {72,92,95,98,112,100,103,99}
};

static const float QC[8][8] = {
    {17,18,24,47,99,99,99,99},
    {18,21,26,66,99,99,99,99},
    {24,26,56,99,99,99,99,99},
    {47,66,99,99,99,99,99,99},
    {99,99,99,99,99,99,99,99},
    {99,99,99,99,99,99,99,99},
    {99,99,99,99,99,99,99,99},
    {99,99,99,99,99,99,99,99}
};

float3 block[8][8];
for (int y = 0; y < 8; y++)
{
    for (int x = 0; x < 8; x++)
    {
        float2 sampleUV = (grid + float2(x, y)) / Resolution;

        // Clamp UVs inside [0, 1 - texel]
        sampleUV = clamp(sampleUV, 0.0, (Resolution - 1.0) / Resolution);

        float3 rgb = Texture2DSample(InputTex, InputTexSampler, sampleUV).rgb;

        // --- RGB → YCbCr (linear, no offsets) ---
        float Y  = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b;
        float Cb = 0.564 * (rgb.b - Y);
        float Cr = 0.713 * (rgb.r - Y);
        block[y][x] = float3(Y, Cb, Cr);
    }
}
// --- Forward DCT along X ---
float3 dctX[8][8];
for (int y = 0; y < 8; y++)
{
    for (int u = 0; u < 8; u++)
    {
        float alpha = (u == 0) ? 0.35355339 : 0.5;
        float3 sum = float3(0,0,0);
        for (int x = 0; x < 8; x++)
        {
            float coeff = cos((PI * (2.0 * x + 1.0) * u) / 16.0);
            sum += block[y][x] * coeff;
        }
        dctX[y][u] = sum * alpha;
    }
}

// --- Forward DCT along Y ---
float3 dctXY[8][8];
for (int u = 0; u < 8; u++)
{
    for (int v = 0; v < 8; v++)
    {
        float alpha = (v == 0) ? 0.35355339 : 0.5;
        float3 sum = float3(0,0,0);
        for (int y = 0; y < 8; y++)
        {
            float coeff = cos((PI * (2.0 * y + 1.0) * v) / 16.0);
            sum += dctX[y][u] * coeff;
        }
        dctXY[v][u] = sum * alpha;
    }
}

// --- Quantization ---
float qScale = lerp(2.0, 0.1, Quality / 100.0);
for (int v = 0; v < 8; v++)
{
    for (int u = 0; u < 8; u++)
    {
        float3 q;
        q.x = QY[v][u] * qScale;
        q.y = QC[v][u] * qScale;
        q.z = QC[v][u] * qScale;
        dctXY[v][u] = round(dctXY[v][u] / q * STEPS) * q / STEPS;
    }
}

// --- Inverse DCT along Y ---
float3 idctY[8][8];
for (int u = 0; u < 8; u++)
{
    for (int y = 0; y < 8; y++)
    {
        float3 sum = float3(0,0,0);
        for (int v = 0; v < 8; v++)
        {
            float alpha = (v == 0) ? 0.35355339 : 0.5;
            float coeff = cos((PI * (2.0 * y + 1.0) * v) / 16.0);
            sum += alpha * dctXY[v][u] * coeff;
        }
        idctY[y][u] = sum;
    }
}

// --- Inverse DCT along X + YCbCr → RGB ---
float3 finalBlock[8][8];
for (int y = 0; y < 8; y++)
{
    for (int x = 0; x < 8; x++)
    {
        float3 sum = float3(0,0,0);
        for (int u = 0; u < 8; u++)
        {
            float alpha = (u == 0) ? 0.35355339 : 0.5;
            float coeff = cos((PI * (2.0 * x + 1.0) * u) / 16.0);
            sum += alpha * idctY[y][u] * coeff;
        }

        // YCbCr → RGB (linear)
        float3 ycbcr = sum * 0.25;
        float Y  = ycbcr.x;
        float Cb = ycbcr.y;
        float Cr = ycbcr.z;
        float3 rgb;
        rgb.r = Y + 1.403 * Cr;
        rgb.g = Y - 0.714 * Cr - 0.344 * Cb;
        rgb.b = Y + 1.773 * Cb;
        finalBlock[y][x] = rgb;
    }
}

// --- Bilinear interpolation inside the 8x8 block ---
float2 uvBlock = uv8 - 0.5;
uvBlock = clamp(uvBlock, 0.0, 7.0);
int ix0 = (int)floor(uvBlock.x);
int iy0 = (int)floor(uvBlock.y);
int ix1 = min(ix0 + 1, 7);
int iy1 = min(iy0 + 1, 7);
float fx = uvBlock.x - ix0;
float fy = uvBlock.y - iy0;

float3 col = lerp(
    lerp(finalBlock[iy0][ix0], finalBlock[iy0][ix1], fx),
    lerp(finalBlock[iy1][ix0], finalBlock[iy1][ix1], fx),
    fy
);

return pow(saturate(col), 1.0 / 2.5);


Here is the setup of the PostProcessing material:

Notice that the “JPEG Compression” node is a custom node with the HLSL code.
The output type of this node is “CMOT Float 3”.
The “InputTex” is a TextureObject with a RenderTarget assigned to it.
Since my goal was to mimic highliy compressed digital camera footage, I rendered a SceneCaptureComponent2D to a RenderTarget in a lower resolution (720x576 pixels), which then gets compressed and displayed.

This is how I render the image to the RenderTarget from my FirstPersonCharacter blueprint:
The SceneCaptureComponent2D is a child of the camera:

In the “SceneCapture” tab in the details of the SceneCaptureComponent2D I assigned the RenderTarget texture:

The RenderTarget itself has the following properties:

Now you can assign the PostProcessing material for JPEG compression to the actual camera.

This solution might be far from ideal, but it works.
It would be great if someone has some ideas to make this more robust, correct and performant.

This is what the result might look like with 128 steps and a quality of 50:

Steps: 256, Quality: 80

I would really appreciate it, if someone might have some ideas to optimize the HLSL code. :slight_smile: