From ab715a8876cf1ac48591352a88d02c07e0fdd541 Mon Sep 17 00:00:00 2001
From: Philip Rebohle <philip.rebohle@tu-dortmund.de>
Date: Fri, 3 May 2024 04:24:15 +0200
Subject: [PATCH] [d3d11] Implement better filtering when blitting video
 content

Unlike linear filtering this guarantees that we never read outside the source
region, and this also lets us perform color space conversion prior to filtering.
---
 src/d3d11/d3d11_video.cpp                    | 63 +++++-----------
 src/d3d11/d3d11_video.h                      |  4 +-
 src/d3d11/shaders/d3d11_video_blit_frag.frag | 77 +++++++++++++++-----
 3 files changed, 76 insertions(+), 68 deletions(-)
diff --git a/src/d3d11/d3d11_video.cpp b/src/d3d11/d3d11_video.cpp
index 652fea5c..1543528a 100644
--- a/src/d3d11/d3d11_video.cpp
+++ b/src/d3d11/d3d11_video.cpp
@@ -1263,28 +1263,27 @@ namespace dxvk {
       }
 
       VkExtent3D viewExtent = cViews[0]->mipLevelExtent(0);
-      VkViewport srcViewport;
+
+      VkRect2D srcRect;
+      srcRect.offset = { 0, 0 };
+      srcRect.extent = { viewExtent.width, viewExtent.height };
 
       if (cStreamState.srcRectEnabled) {
-        srcViewport.x      = float(cStreamState.srcRect.left);
-        srcViewport.y      = float(cStreamState.srcRect.top);
-        srcViewport.width  = float(cStreamState.srcRect.right) - srcViewport.x;
-        srcViewport.height = float(cStreamState.srcRect.bottom) - srcViewport.y;
-      } else {
-        srcViewport.x      = 0.0f;
-        srcViewport.y      = 0.0f;
-        srcViewport.width  = float(viewExtent.width);
-        srcViewport.height = float(viewExtent.height);
+        srcRect.offset.x      = cStreamState.srcRect.left;
+        srcRect.offset.y      = cStreamState.srcRect.top;
+        srcRect.extent.width  = cStreamState.srcRect.right - srcRect.offset.x;
+        srcRect.extent.height = cStreamState.srcRect.bottom - srcRect.offset.y;
       }
 
       UboData uboData = { };
       uboData.colorMatrix[0][0] = 1.0f;
       uboData.colorMatrix[1][1] = 1.0f;
       uboData.colorMatrix[2][2] = 1.0f;
-      uboData.coordMatrix[0][0] = srcViewport.width / float(viewExtent.width);
-      uboData.coordMatrix[1][1] = srcViewport.height / float(viewExtent.height);
-      uboData.coordMatrix[2][0] = srcViewport.x / float(viewExtent.width);
-      uboData.coordMatrix[2][1] = srcViewport.y / float(viewExtent.height);
+      uboData.coordMatrix[0][0] = float(srcRect.extent.width) / float(viewExtent.width);
+      uboData.coordMatrix[1][1] = float(srcRect.extent.height) / float(viewExtent.height);
+      uboData.coordMatrix[2][0] = float(srcRect.offset.x) / float(viewExtent.width);
+      uboData.coordMatrix[2][1] = float(srcRect.offset.y) / float(viewExtent.height);
+      uboData.srcRect = srcRect;
       uboData.yMin = 0.0f;
       uboData.yMax = 1.0f;
       uboData.isPlanar = cViews[1] != nullptr;
@@ -1307,17 +1306,14 @@ namespace dxvk {
       ctx->bindShader<VK_SHADER_STAGE_FRAGMENT_BIT>(Rc<DxvkShader>(m_fs));
 
       ctx->bindUniformBuffer(VK_SHADER_STAGE_FRAGMENT_BIT, 0, DxvkBufferSlice(m_ubo));
-      ctx->bindResourceSampler(VK_SHADER_STAGE_FRAGMENT_BIT, 1, Rc<DxvkSampler>(m_sampler));
 
       for (uint32_t i = 0; i < cViews.size(); i++)
-        ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 2 + i, Rc<DxvkImageView>(cViews[i]));
+        ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, Rc<DxvkImageView>(cViews[i]));
 
       ctx->draw(3, 1, 0, 0);
 
-      ctx->bindResourceSampler(VK_SHADER_STAGE_FRAGMENT_BIT, 1, nullptr);
-
       for (uint32_t i = 0; i < cViews.size(); i++)
-        ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 2 + i, nullptr);
+        ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, nullptr);
     });
   }
 
@@ -1332,38 +1328,14 @@ namespace dxvk {
   }
 
 
-  void D3D11VideoContext::CreateSampler() {
-    DxvkSamplerCreateInfo samplerInfo;
-    samplerInfo.magFilter       = VK_FILTER_NEAREST;
-    samplerInfo.minFilter       = VK_FILTER_LINEAR;
-    samplerInfo.mipmapMode      = VK_SAMPLER_MIPMAP_MODE_NEAREST;
-    samplerInfo.mipmapLodBias   = 0.0f;
-    samplerInfo.mipmapLodMin    = 0.0f;
-    samplerInfo.mipmapLodMax    = 0.0f;
-    samplerInfo.useAnisotropy   = VK_FALSE;
-    samplerInfo.maxAnisotropy   = 1.0f;
-    samplerInfo.addressModeU    = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
-    samplerInfo.addressModeV    = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
-    samplerInfo.addressModeW    = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
-    samplerInfo.compareToDepth  = VK_FALSE;
-    samplerInfo.compareOp       = VK_COMPARE_OP_ALWAYS;
-    samplerInfo.reductionMode   = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE;
-    samplerInfo.borderColor     = VkClearColorValue();
-    samplerInfo.usePixelCoord   = VK_FALSE;
-    samplerInfo.nonSeamless     = VK_FALSE;
-    m_sampler = m_device->createSampler(samplerInfo);
-  }
-
-
   void D3D11VideoContext::CreateShaders() {
     SpirvCodeBuffer vsCode(d3d11_video_blit_vert);
     SpirvCodeBuffer fsCode(d3d11_video_blit_frag);
 
-    const std::array<DxvkBindingInfo, 4> fsBindings = {{
+    const std::array<DxvkBindingInfo, 3> fsBindings = {{
       { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_UNIFORM_READ_BIT, VK_TRUE },
-      { VK_DESCRIPTOR_TYPE_SAMPLER,        1, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, 0 },
+      { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,  1, VK_IMAGE_VIEW_TYPE_2D,       VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT },
       { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,  2, VK_IMAGE_VIEW_TYPE_2D,       VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT },
-      { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,  3, VK_IMAGE_VIEW_TYPE_2D,       VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT },
     }};
 
     DxvkShaderCreateInfo vsInfo;
@@ -1385,7 +1357,6 @@ namespace dxvk {
     if (std::exchange(m_resourcesCreated, true))
       return;
 
-    CreateSampler();
     CreateUniformBuffer();
     CreateShaders();
   }
diff --git a/src/d3d11/d3d11_video.h b/src/d3d11/d3d11_video.h
index 30288a58..cac29f91 100644
--- a/src/d3d11/d3d11_video.h
+++ b/src/d3d11/d3d11_video.h
@@ -584,6 +584,7 @@ namespace dxvk {
     struct alignas(16) UboData {
       float colorMatrix[3][4];
       float coordMatrix[3][2];
+      VkRect2D srcRect;
       float yMin, yMax;
       VkBool32 isPlanar;
     };
@@ -593,7 +594,6 @@ namespace dxvk {
     Rc<DxvkDevice>          m_device;
     Rc<DxvkShader>          m_vs;
     Rc<DxvkShader>          m_fs;
-    Rc<DxvkSampler>         m_sampler;
     Rc<DxvkBuffer>          m_ubo;
 
     VkExtent2D m_dstExtent = { 0u, 0u };
@@ -613,8 +613,6 @@ namespace dxvk {
 
     void CreateUniformBuffer();
 
-    void CreateSampler();
-
     void CreateShaders();
 
     void CreateResources();
diff --git a/src/d3d11/shaders/d3d11_video_blit_frag.frag b/src/d3d11/shaders/d3d11_video_blit_frag.frag
index ff54aeda..96b9b50f 100644
--- a/src/d3d11/shaders/d3d11_video_blit_frag.frag
+++ b/src/d3d11/shaders/d3d11_video_blit_frag.frag
@@ -1,5 +1,7 @@
 #version 450
 
+#extension GL_EXT_samplerless_texture_functions : require
+
 // Can't use matrix types here since even a two-row
 // matrix will be padded to 16 bytes per column for
 // absolutely no reason
@@ -11,6 +13,8 @@ uniform ubo_t {
   vec2 coord_matrix_c1;
   vec2 coord_matrix_c2;
   vec2 coord_matrix_c3;
+  uvec2 src_offset;
+  uvec2 src_extent;
   float y_min;
   float y_max;
   bool is_planar;
@@ -19,9 +23,8 @@ uniform ubo_t {
 layout(location = 0) in vec2 i_texcoord;
 layout(location = 0) out vec4 o_color;
 
-layout(set = 0, binding = 1) uniform sampler s_sampler;
-layout(set = 0, binding = 2) uniform texture2D s_inputY;
-layout(set = 0, binding = 3) uniform texture2D s_inputCbCr;
+layout(set = 0, binding = 1) uniform texture2D s_inputY;
+layout(set = 0, binding = 2) uniform texture2D s_inputCbCr;
 
 void main() {
   // Transform input texture coordinates to
@@ -31,25 +34,61 @@ void main() {
     coord_matrix_c2,
     coord_matrix_c3);
 
-  vec2 coord = coord_matrix * vec3(i_texcoord, 1.0f);
-
-  // Fetch source image color
-  vec4 color = vec4(0.0f, 0.0f, 0.0f, 1.0f);
-
-  if (is_planar) {
-    color.g  = texture(sampler2D(s_inputY,    s_sampler), coord).r;
-    color.rb = texture(sampler2D(s_inputCbCr, s_sampler), coord).gr;
-    color.g  = clamp((color.g - y_min) / (y_max - y_min), 0.0f, 1.0f);
-  } else {
-    color = texture(sampler2D(s_inputY, s_sampler), coord);
-  }
-
-  // Color space transformation
+  // Load color space transform
   mat3x4 color_matrix = mat3x4(
     color_matrix_r1,
     color_matrix_r2,
     color_matrix_r3);
 
-  o_color.rgb = vec4(color.rgb, 1.0f) * color_matrix;
-  o_color.a = color.a;
+  // Compute actual pixel coordinates to sample. We filter
+  // manually in order to avoid bleeding from pixels outside
+  // the source rectangle.
+  vec2 abs_size_y = vec2(textureSize(s_inputY, 0));
+  vec2 abs_size_c = vec2(textureSize(s_inputCbCr, 0));
+
+  vec2 coord = coord_matrix * vec3(i_texcoord, 1.0f);
+  coord -= 0.5f / abs_size_y;
+
+  vec2 size_factor = abs_size_c / abs_size_y;
+
+  vec2 src_lo = vec2(src_offset);
+  vec2 src_hi = vec2(src_offset + src_extent - 1u);
+
+  vec2 abs_coord = coord * abs_size_y;
+  vec2 fract_coord = fract(clamp(abs_coord, src_lo, src_hi));
+
+  vec4 accum = vec4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < 4; i++) {
+    ivec2 offset = ivec2(i & 1, i >> 1);
+
+    // Compute exact pixel coordinates for the current
+    // iteration and clamp it to the source rectangle.
+    vec2 fetch_coord = clamp(abs_coord + vec2(offset), src_lo, src_hi);
+
+    // Fetch actual pixel color in source color space
+    vec4 color;
+
+    if (is_planar) {
+      color.g  = texelFetch(s_inputY, ivec2(fetch_coord), 0).r;
+      color.rb = texelFetch(s_inputCbCr, ivec2(fetch_coord * size_factor), 0).gr;
+      color.g  = clamp((color.g - y_min) / (y_max - y_min), 0.0f, 1.0f);
+      color.a = 1.0f;
+    } else {
+      color = texelFetch(s_inputY, ivec2(fetch_coord), 0);
+    }
+
+    // Transform color space before accumulation
+    color.rgb = vec4(color.rgb, 1.0f) * color_matrix;
+
+    // Filter and accumulate final pixel color
+    vec2 factor = fract_coord;
+
+    if (offset.x == 0) factor.x = 1.0f - factor.x;
+    if (offset.y == 0) factor.y = 1.0f - factor.y;
+
+    accum += factor.x * factor.y * color;
+  }
+
+  o_color = accum;
 }