shithub: dav1d

Download patch

ref: cbe05cf439807d872fc35b2c5fbed5eb3d2f51a9
parent: 7bbebdb4639ee20fa3213396b223ebdc1db91210
author: Niklas Haas <git@haasn.xyz>
date: Sun May 17 22:34:24 EDT 2020

dav1dplay: support on-GPU film grain synthesis

Annoying minor differences in this struct layout mean we can't just
memcpy the entire thing. Oh well.

Note: technically, PL_API_VER 33 added this API, but PL_API_VER 63 is
the minimum version of libplacebo that doesn't have glaring bugs when
generating chroma grain, so we require that as a minimum instead.

(I tested this version on some 4:2:2 and 4:2:0, 8-bit and 10-bit grain
samples I had lying around and made sure the output was identical up to
differences in rounding / dithering.)

--- a/examples/dav1dplay.c
+++ b/examples/dav1dplay.c
@@ -97,6 +97,7 @@
             " --tilethreads $num:   number of tile threads (default: 1)\n"
             " --highquality:        enable high quality rendering\n"
             " --zerocopy/-z:        enable zero copy upload path\n"
+            " --gpugrain/-g:        enable GPU grain synthesis\n"
             " --version/-v:         print version and exit\n"
             " --renderer/-r:        select renderer backend (default: auto)\n");
     exit(1);
@@ -121,7 +122,7 @@
     Dav1dSettings *lib_settings = &rd_ctx->lib_settings;
 
     // Short options
-    static const char short_opts[] = "i:vuzr:";
+    static const char short_opts[] = "i:vuzgr:";
 
     enum {
         ARG_FRAME_THREADS = 256,
@@ -138,6 +139,7 @@
         { "tilethreads",    1, NULL, ARG_TILE_THREADS },
         { "highquality",    0, NULL, ARG_HIGH_QUALITY },
         { "zerocopy",       0, NULL, 'z' },
+        { "gpugrain",       0, NULL, 'g' },
         { "renderer",       0, NULL, 'r'},
         { NULL,             0, NULL, 0 },
     };
@@ -159,6 +161,9 @@
             case 'z':
                 settings->zerocopy = true;
                 break;
+            case 'g':
+                settings->gpugrain = true;
+                break;
             case 'r':
                 settings->renderer_name = optarg;
                 break;
@@ -522,7 +527,15 @@
                 .release_picture_callback = renderer_info->release_pic,
             };
         } else {
-            fprintf(stderr, "--zerocopy unsupported by compiled renderer\n");
+            fprintf(stderr, "--zerocopy unsupported by selected renderer\n");
+        }
+    }
+
+    if (rd_ctx->settings.gpugrain) {
+        if (renderer_info->supports_gpu_grain) {
+            rd_ctx->lib_settings.apply_grain = 0;
+        } else {
+            fprintf(stderr, "--gpugrain unsupported by selected renderer\n");
         }
     }
 
--- a/examples/dp_renderer.h
+++ b/examples/dp_renderer.h
@@ -60,6 +60,7 @@
     int highquality;
     int untimed;
     int zerocopy;
+    int gpugrain;
 } Dav1dPlaySettings;
 
 #define WINDOW_WIDTH  910
@@ -89,6 +90,8 @@
     // Callback for alloc/release pictures (optional)
     int (*alloc_pic)(Dav1dPicture *pic, void *cookie);
     void (*release_pic)(Dav1dPicture *pic, void *cookie);
+    // Whether or not this renderer can apply on-GPU film grain synthesis
+    int supports_gpu_grain;
 } Dav1dPlayRenderInfo;
 
 extern const Dav1dPlayRenderInfo rdr_placebo_vk;
--- a/examples/dp_renderer_placebo.c
+++ b/examples/dp_renderer_placebo.c
@@ -484,6 +484,40 @@
     case DAV1D_CHR_COLOCATED:   chroma_loc = PL_CHROMA_TOP_LEFT; break;
     }
 
+#if PL_API_VER >= 63
+    if (settings->gpugrain && dav1d_pic->frame_hdr->film_grain.present) {
+        Dav1dFilmGrainData *src = &dav1d_pic->frame_hdr->film_grain.data;
+        struct pl_av1_grain_data *dst = &image->av1_grain;
+        *dst = (struct pl_av1_grain_data) {
+            .grain_seed     = src->seed,
+            .num_points_y   = src->num_y_points,
+            .chroma_scaling_from_luma = src->chroma_scaling_from_luma,
+            .num_points_uv  = { src->num_uv_points[0], src->num_uv_points[1] },
+            .scaling_shift  = src->scaling_shift,
+            .ar_coeff_lag   = src->ar_coeff_lag,
+            .ar_coeff_shift = src->ar_coeff_shift,
+            .grain_scale_shift = src->grain_scale_shift,
+            .uv_mult        = { src->uv_mult[0], src->uv_mult[1] },
+            .uv_mult_luma   = { src->uv_luma_mult[0], src->uv_luma_mult[1] },
+            .uv_offset      = { src->uv_offset[0], src->uv_offset[1] },
+            .overlap        = src->overlap_flag,
+        };
+
+        assert(sizeof(dst->points_y) == sizeof(src->y_points));
+        assert(sizeof(dst->points_uv) == sizeof(src->uv_points));
+        assert(sizeof(dst->ar_coeffs_y) == sizeof(src->ar_coeffs_y));
+        memcpy(dst->points_y, src->y_points, sizeof(src->y_points));
+        memcpy(dst->points_uv, src->uv_points, sizeof(src->uv_points));
+        memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(src->ar_coeffs_y));
+
+        // this one has different row sizes for alignment
+        for (int c = 0; c < 2; c++) {
+            for (int i = 0; i < 25; i++)
+                dst->ar_coeffs_uv[c][i] = src->ar_coeffs_uv[c][i];
+        }
+    }
+#endif
+
     // Upload the actual planes
     struct pl_plane_data data[3] = {
         {
@@ -646,6 +680,10 @@
     .update_frame = placebo_upload_image,
     .alloc_pic = placebo_alloc_pic,
     .release_pic = placebo_release_pic,
+
+# if PL_API_VER >= 63
+    .supports_gpu_grain = 1,
+# endif
 };
 #else
 const Dav1dPlayRenderInfo rdr_placebo_vk = { NULL };
@@ -660,6 +698,10 @@
     .update_frame = placebo_upload_image,
     .alloc_pic = placebo_alloc_pic,
     .release_pic = placebo_release_pic,
+
+# if PL_API_VER >= 63
+    .supports_gpu_grain = 1,
+# endif
 };
 #else
 const Dav1dPlayRenderInfo rdr_placebo_gl = { NULL };