• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/film_grain.h"
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstring>
23 #include <new>
24 
25 #include "src/dsp/common.h"
26 #include "src/dsp/constants.h"
27 #include "src/dsp/dsp.h"
28 #include "src/dsp/film_grain_common.h"
29 #include "src/utils/array_2d.h"
30 #include "src/utils/blocking_counter.h"
31 #include "src/utils/common.h"
32 #include "src/utils/compiler_attributes.h"
33 #include "src/utils/constants.h"
34 #include "src/utils/logging.h"
35 #include "src/utils/threadpool.h"
36 
37 namespace libgav1 {
38 
39 namespace {
40 
41 // The kGaussianSequence array contains random samples from a Gaussian
42 // distribution with zero mean and standard deviation of about 512 clipped to
43 // the range of [-2048, 2047] (representable by a signed integer using 12 bits
44 // of precision) and rounded to the nearest multiple of 4.
45 //
46 // Note: It is important that every element in the kGaussianSequence array be
47 // less than 2040, so that RightShiftWithRounding(kGaussianSequence[i], 4) is
48 // less than 128 for bitdepth=8 (GrainType=int8_t).
49 constexpr int16_t kGaussianSequence[/*2048*/] = {
50     56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,   820,
51     224,   1248,  996,   272,   -8,    -916,  -388,  -732,  -104,  -188, 800,
52     112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788, 588,
53     -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,  368,
54     432,   -196,  -720,  -192,  1000,  -332,  652,   -136,  -552,  -604, -4,
55     192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,  396,
56     540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528, -740,
57     248,   -968,  -848,  608,   376,   -60,   -292,  -40,   -156,  252,  -292,
58     248,   224,   -280,  400,   -244,  244,   -60,   76,    -80,   212,  532,
59     340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,  -704,
60     220,   -204,  640,   -160,  1220,  -408,  900,   336,   20,    -336, -96,
61     -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,  -520, 244,
62     60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344, -136,
63     488,   -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,  -676,
64     -376,  168,   -108,  464,   8,     564,   64,    240,   308,   -300, -400,
65     -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,  844,
66     -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,  96,
67     -1244, -288,  276,   848,   832,   -360,  656,   464,   -384,  -332, -356,
68     728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100, 280,
69     4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,  808,
70     772,   20,    268,   88,    -332,  -284,  124,   -384,  -448,  208,  -228,
71     -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,   28,   136,
72     -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136, 264,
73     -528,  -1108, 632,   -484,  -592,  -344,  796,   124,   -668,  -768, 388,
74     1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,  256,  -500,
75     204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548, -384,
76     548,   -296,  428,   -108,  -8,    -912,  -324,  -224,  -88,   -112, -220,
77     -100,  996,   -796,  548,   360,   -216,  180,   428,   -200,  -212, 148,
78     96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604, -572,
79     -332,  -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,   -516,
80     240,   -208,  -40,   100,   -592,  344,   -308,  -452,  -228,  20,   916,
81     -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,  -492,
82     896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100, 560,
83     -1020, 180,   -800,  -64,   76,    576,   1068,  396,   660,   552,  -108,
84     -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,  516,
85     -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148, 88,
86     -152,  1012,  1064,  -228,  164,   -376,  -684,  592,   -392,  156,  196,
87     -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,  -436, 864,
88     424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176, -920,
89     436,   -48,   1176,  -884,  416,   -776,  -824,  -884,  524,   -548, -564,
90     -68,   -164,  -96,   692,   364,   -692,  -1012, -68,   260,   -480, 876,
91     -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292, 244,
92     496,   372,   -32,   280,   200,   112,   -440,  -96,   24,    -644, -184,
93     56,    -432,  224,   -980,  272,   -260,  144,   -436,  420,   356,  364,
94     -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688, 72,
95     540,   416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,  24,
96     424,   264,   1040,  128,   -912,  -524,  -356,  64,    876,   -12,  4,
97     -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400, -120,
98     756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740, -108,
99     -260,  328,   -268,  224,   -200,  -416,  184,   -604,  -564,  -20,  296,
100     60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,  -336,
101     -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360, -164,
102     -1560, -776,  1156,  -428,  164,   -504,  -112,  120,   -216,  -148, -264,
103     308,   32,    64,    -72,   72,    116,   176,   -64,   -272,  460,  -536,
104     -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,  -296,
105     -1196, -288,  -560,  1040,  -472,  116,   -848,  -1116, 116,   636,  696,
106     284,   -176,  1016,  204,   -864,  -648,  -248,  356,   972,   -584, -204,
107     264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,  212,
108     -212,  52,    12,    200,   268,   -488,  -404,  -880,  824,   -672, -40,
109     908,   -248,  500,   716,   -576,  492,   -576,  16,    720,   -108, 384,
110     124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188, -8,
111     1268,  296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660, 704,
112     -224,  596,   -132,  268,   32,    -452,  884,   104,   -1008, 424,  -1348,
113     -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,  -592,
114     -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256, -420,
115     320,   208,   -144,  -156,  156,   364,   452,   28,    540,   316,  220,
116     -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,  208,
117     -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,  -544,
118     -388,  -264,  908,   -800,  -628,  -612,  -568,  572,   -220,  164,  288,
119     -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,   364,  240,
120     -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580, -132,
121     636,   -76,   392,   4,     -412,  540,   508,   328,   -356,  -36,  16,
122     -220,  -64,   -248,  -60,   24,    -192,  368,   1040,  92,    -24,  -1044,
123     -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224, 732,
124     392,   356,   212,   -80,   -424,  -1008, -324,  588,   -1496, 576,  460,
125     -816,  -848,  56,    -580,  -92,   -1372, -112,  -496,  200,   364,  52,
126     -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268, -104,
127     -284,  -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,  -460,
128     756,   388,   588,   236,   -436,  -72,   -176,  -404,  -316,  -148, 716,
129     -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344, 960,
130     472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,  -476,
131     844,   -748,  -364,  -44,   1116,  -1104, -1056, 76,    428,   552,  -692,
132     60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,  352,
133     -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,  -144,
134     -8,    484,   48,    284,   -260,  -240,  256,   -100,  -292,  -204, -44,
135     472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,  564,  356,
136     652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524, -452,
137     -436,  860,   -736,  212,   124,   504,   -476,  468,   76,    -472, 552,
138     -692,  -944,  -620,  740,   -240,  400,   132,   20,    192,   -196, 264,
139     -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768, -448,
140     -832,  148,   248,   652,   616,   1236,  288,   -328,  -400,  -124, 588,
141     220,   520,   -696,  1032,  768,   -740,  -92,   -272,  296,   448,  -464,
142     412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032, 216,
143     320,   -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432, 132,
144     372,   -52,   -256,  84,    116,   -352,  48,    116,   304,   -384, 412,
145     924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320, 48,
146     332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208, -196,
147     436,   896,   88,    -392,  132,   80,    -964,  -288,  568,   56,   -48,
148     -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716, -292,
149     1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120, -32,
150     -44,   1284,  496,   192,   464,   312,   -76,   -516,  -380,  -456, -1012,
151     -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,  68,   -120,
152     -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480, 56,
153     528,   -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,  -416,
154     -600,  768,   268,   -248,  -88,   -132,  -420,  -432,  80,    -288, 404,
155     -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216, -92,
156     1688,  -300,  180,   1020,  -176,  820,   -68,   -228,  -260,  436,  -904,
157     20,    40,    -508,  440,   -736,  312,   332,   204,   760,   -372, 728,
158     96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,  584,
159     192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252, -48,
160     648,   212,   -688,  228,   -52,   -260,  428,   -412,  -272,  -404, 180,
161     816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,  -528,
162     648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252, -364,
163     -376,  -392,  556,   -256,  -576,  260,   -352,  120,   -16,   -136, -260,
164     -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,  -324,
165     -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516, 64,
166     384,   68,    -128,  136,   240,   248,   -204,  -68,   252,   -932, -120,
167     -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,   100,  168,
168     -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,  -888,
169     64,    184,   352,   600,   460,   164,   604,   -196,  320,   -64,  588,
170     -184,  228,   12,    372,   48,    -848,  -344,  224,   208,   -200, 484,
171     128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464, -580,
172     112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424, 392,
173     828,   40,    84,    200,   -152,  0,     -144,  584,   280,   -120, 80,
174     -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,  -688,
175     0,     160,   356,   372,   -776,  740,   -128,  676,   -248,  -480, 4,
176     -364,  96,    544,   232,   -1032, 956,   236,   356,   20,    -40,  300,
177     24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,  444,
178     508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220, -192,
179     716,   120,   920,   688,   168,   44,    -460,  568,   284,   1144, 1160,
180     600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724, -188,
181     -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,  404,
182     -696,  -72,   -268,  -892,  128,   184,   -344,  -780,  360,   336,  400,
183     344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,  340,  92,
184     -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,  824,
185     164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580, 620,
186     192,   160,   0,     -1676, 1068,  424,   -56,   -360,  468,   -156, 720,
187     288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648, -620,
188     -684,  -24,   -376,  -384,  -108,  -920,  -1032, 768,   180,   -264, -508,
189     -1268, -260,  -60,   300,   -240,  988,   724,   -376,  -576,  -212, -736,
190     556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,  836,
191     268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,  180,
192     884,   -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348, 140,
193     -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,  -32,
194     -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,  916,
195     244,   12,    -736,  -296,  360,   468,   -376,  -108,  -92,   788,  368,
196     -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284, -380,
197     -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636, -572,
198     -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,  864,
199     1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,   160,  908,
200     -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,  84,
201     344,   -520,  348,   -688,  240,   -84,   216,   -1044, -136,  -676, -396,
202     -1500, 960,   -40,   176,   168,   1516,  420,   -504,  -344,  -364, -360,
203     1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152, 928,
204     -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,  228,  -288,
205     504,   560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836, -196,
206     76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356, -504,
207     116,   432,   528,   48,    476,   -168,  -608,  448,   160,   -532, -272,
208     28,    -676,  -12,   828,   980,   456,   520,   104,   -104,  256,  -344,
209     -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124, -208,
210     -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,  -156,
211     -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,  -240,
212     -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340, 432,
213     252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,  244,
214     312,   -716,  592,   -80,   436,   360,   4,     -248,  160,   516,  584,
215     732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,   912,  24,
216     124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624, 300,
217     -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,  180,  -416,
218     440,   192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132, 380,
219     -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668, -384,
220     648,   -832,  508,   552,   -52,   -100,  -656,  208,   -568,  748,  -88,
221     680,   232,   300,   192,   -408,  -1012, -152,  -252,  -268,  272,  -876,
222     -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536, 320,
223     -672,  -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,  88,
224     -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,  196,
225     -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,  120,
226     372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,  664,
227     -232,  420,   4,     -344,  -464,  556,   244,   -416,  -32,   252,  0,
228     -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,  264,
229     -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348, -288,
230     -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,  116,  56,
231     52,    860,   884,   212,   -12,   168,   1020,  512,   -552,  924,  -148,
232     716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208, -1156,
233     -300,  -528,  -472,  364,   100,   -744,  -1056, -32,   540,   280,  144,
234     -676,  -32,   -232,  -280,  -224,  96,    568,   -76,   172,   148,  148,
235     104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,  944,
236     428,   -484};
237 static_assert(sizeof(kGaussianSequence) / sizeof(kGaussianSequence[0]) == 2048,
238               "");
239 
240 // The number of rows in a contiguous group computed by a single worker thread
241 // before checking for the next available group.
242 constexpr int kFrameChunkHeight = 8;
243 
244 // |width| and |height| refer to the plane, not the frame, meaning any
245 // subsampling should be applied by the caller.
246 template <typename Pixel>
CopyImagePlane(const uint8_t * source_plane,ptrdiff_t source_stride,int width,int height,uint8_t * dest_plane,ptrdiff_t dest_stride)247 inline void CopyImagePlane(const uint8_t* source_plane, ptrdiff_t source_stride,
248                            int width, int height, uint8_t* dest_plane,
249                            ptrdiff_t dest_stride) {
250   // If it's the same buffer there's nothing to do.
251   if (source_plane == dest_plane) return;
252 
253   int y = 0;
254   do {
255     memcpy(dest_plane, source_plane, width * sizeof(Pixel));
256     source_plane += source_stride;
257     dest_plane += dest_stride;
258   } while (++y < height);
259 }
260 
261 }  // namespace
262 
263 template <int bitdepth>
FilmGrain(const FilmGrainParams & params,bool is_monochrome,bool color_matrix_is_identity,int subsampling_x,int subsampling_y,int width,int height,ThreadPool * thread_pool)264 FilmGrain<bitdepth>::FilmGrain(const FilmGrainParams& params,
265                                bool is_monochrome,
266                                bool color_matrix_is_identity, int subsampling_x,
267                                int subsampling_y, int width, int height,
268                                ThreadPool* thread_pool)
269     : params_(params),
270       is_monochrome_(is_monochrome),
271       color_matrix_is_identity_(color_matrix_is_identity),
272       subsampling_x_(subsampling_x),
273       subsampling_y_(subsampling_y),
274       width_(width),
275       height_(height),
276       template_uv_width_((subsampling_x != 0) ? kMinChromaWidth
277                                               : kMaxChromaWidth),
278       template_uv_height_((subsampling_y != 0) ? kMinChromaHeight
279                                                : kMaxChromaHeight),
280       thread_pool_(thread_pool) {}
281 
282 template <int bitdepth>
~FilmGrain()283 FilmGrain<bitdepth>::~FilmGrain() {
284   // Clear the earlier poisoning to avoid false reports when the memory range
285   // is reused.
286   ASAN_UNPOISON_MEMORY_REGION(luma_grain_, sizeof(luma_grain_));
287   ASAN_UNPOISON_MEMORY_REGION(scaling_lut_y_, sizeof(scaling_lut_y_));
288 }
289 
290 template <int bitdepth>
Init()291 bool FilmGrain<bitdepth>::Init() {
292   // Section 7.18.3.3. Generate grain process.
293   const dsp::Dsp& dsp = *dsp::GetDspTable(bitdepth);
294   // If params_.num_y_points is 0, luma_grain_ will never be read, so we don't
295   // need to generate it.
296   const bool use_luma = params_.num_y_points > 0;
297   if (use_luma) {
298     GenerateLumaGrain(params_, luma_grain_);
299     // If params_.auto_regression_coeff_lag is 0, the filter is the identity
300     // filter and therefore can be skipped.
301     if (params_.auto_regression_coeff_lag > 0) {
302       dsp.film_grain
303           .luma_auto_regression[params_.auto_regression_coeff_lag - 1](
304               params_, luma_grain_);
305     }
306   } else {
307     // Have AddressSanitizer warn if luma_grain_ is used.
308     ASAN_POISON_MEMORY_REGION(luma_grain_, sizeof(luma_grain_));
309   }
310   if (!is_monochrome_) {
311     GenerateChromaGrains(params_, template_uv_width_, template_uv_height_,
312                          u_grain_, v_grain_);
313     if (params_.auto_regression_coeff_lag > 0 || use_luma) {
314       dsp.film_grain.chroma_auto_regression[static_cast<int>(
315           use_luma)][params_.auto_regression_coeff_lag](
316           params_, luma_grain_, subsampling_x_, subsampling_y_, u_grain_,
317           v_grain_);
318     }
319   }
320 
321   // Section 7.18.3.4. Scaling lookup initialization process.
322 
323   // Initialize scaling_lut_y_. If params_.num_y_points > 0, scaling_lut_y_
324   // is used for the Y plane. If params_.chroma_scaling_from_luma is true,
325   // scaling_lut_u_ and scaling_lut_v_ are the same as scaling_lut_y_ and are
326   // set up as aliases. So we need to initialize scaling_lut_y_ under these
327   // two conditions.
328   //
329   // Note: Although it does not seem to make sense, there are test vectors
330   // with chroma_scaling_from_luma=true and params_.num_y_points=0.
331 #if LIBGAV1_MSAN
332   // Quiet film grain / md5 msan warnings.
333   memset(scaling_lut_y_, 0, sizeof(scaling_lut_y_));
334 #endif
335   if (use_luma || params_.chroma_scaling_from_luma) {
336     dsp.film_grain.initialize_scaling_lut(
337         params_.num_y_points, params_.point_y_value, params_.point_y_scaling,
338         scaling_lut_y_, kScalingLutLength);
339   } else {
340     ASAN_POISON_MEMORY_REGION(scaling_lut_y_, sizeof(scaling_lut_y_));
341   }
342   if (!is_monochrome_) {
343     if (params_.chroma_scaling_from_luma) {
344       scaling_lut_u_ = scaling_lut_y_;
345       scaling_lut_v_ = scaling_lut_y_;
346     } else if (params_.num_u_points > 0 || params_.num_v_points > 0) {
347       const size_t buffer_size =
348           kScalingLutLength * (static_cast<int>(params_.num_u_points > 0) +
349                                static_cast<int>(params_.num_v_points > 0));
350       scaling_lut_chroma_buffer_.reset(new (std::nothrow) int16_t[buffer_size]);
351       if (scaling_lut_chroma_buffer_ == nullptr) return false;
352 
353       int16_t* buffer = scaling_lut_chroma_buffer_.get();
354 #if LIBGAV1_MSAN
355       // Quiet film grain / md5 msan warnings.
356       memset(buffer, 0, buffer_size * 2);
357 #endif
358       if (params_.num_u_points > 0) {
359         scaling_lut_u_ = buffer;
360         dsp.film_grain.initialize_scaling_lut(
361             params_.num_u_points, params_.point_u_value,
362             params_.point_u_scaling, scaling_lut_u_, kScalingLutLength);
363         buffer += kScalingLutLength;
364       }
365       if (params_.num_v_points > 0) {
366         scaling_lut_v_ = buffer;
367         dsp.film_grain.initialize_scaling_lut(
368             params_.num_v_points, params_.point_v_value,
369             params_.point_v_scaling, scaling_lut_v_, kScalingLutLength);
370       }
371     }
372   }
373   return true;
374 }
375 
376 template <int bitdepth>
GenerateLumaGrain(const FilmGrainParams & params,GrainType * luma_grain)377 void FilmGrain<bitdepth>::GenerateLumaGrain(const FilmGrainParams& params,
378                                             GrainType* luma_grain) {
379   // If params.num_y_points is equal to 0, Section 7.18.3.3 specifies we set
380   // the luma_grain array to all zeros. But the Note at the end of Section
381   // 7.18.3.3 says luma_grain "will never be read in this case". So we don't
382   // call GenerateLumaGrain if params.num_y_points is equal to 0.
383   assert(params.num_y_points > 0);
384   const int shift = kBitdepth12 - bitdepth + params.grain_scale_shift;
385   uint16_t seed = params.grain_seed;
386   GrainType* luma_grain_row = luma_grain;
387   for (int y = 0; y < kLumaHeight; ++y) {
388     for (int x = 0; x < kLumaWidth; ++x) {
389       luma_grain_row[x] = RightShiftWithRounding(
390           kGaussianSequence[GetFilmGrainRandomNumber(11, &seed)], shift);
391     }
392     luma_grain_row += kLumaWidth;
393   }
394 }
395 
396 template <int bitdepth>
GenerateChromaGrains(const FilmGrainParams & params,int chroma_width,int chroma_height,GrainType * u_grain,GrainType * v_grain)397 void FilmGrain<bitdepth>::GenerateChromaGrains(const FilmGrainParams& params,
398                                                int chroma_width,
399                                                int chroma_height,
400                                                GrainType* u_grain,
401                                                GrainType* v_grain) {
402   const int shift = kBitdepth12 - bitdepth + params.grain_scale_shift;
403   if (params.num_u_points == 0 && !params.chroma_scaling_from_luma) {
404     memset(u_grain, 0, chroma_height * chroma_width * sizeof(*u_grain));
405   } else {
406     uint16_t seed = params.grain_seed ^ 0xb524;
407     GrainType* u_grain_row = u_grain;
408     assert(chroma_width > 0);
409     assert(chroma_height > 0);
410     int y = 0;
411     do {
412       int x = 0;
413       do {
414         u_grain_row[x] = RightShiftWithRounding(
415             kGaussianSequence[GetFilmGrainRandomNumber(11, &seed)], shift);
416       } while (++x < chroma_width);
417 
418       u_grain_row += chroma_width;
419     } while (++y < chroma_height);
420   }
421   if (params.num_v_points == 0 && !params.chroma_scaling_from_luma) {
422     memset(v_grain, 0, chroma_height * chroma_width * sizeof(*v_grain));
423   } else {
424     GrainType* v_grain_row = v_grain;
425     uint16_t seed = params.grain_seed ^ 0x49d8;
426     int y = 0;
427     do {
428       int x = 0;
429       do {
430         v_grain_row[x] = RightShiftWithRounding(
431             kGaussianSequence[GetFilmGrainRandomNumber(11, &seed)], shift);
432       } while (++x < chroma_width);
433 
434       v_grain_row += chroma_width;
435     } while (++y < chroma_height);
436   }
437 }
438 
439 template <int bitdepth>
AllocateNoiseStripes()440 bool FilmGrain<bitdepth>::AllocateNoiseStripes() {
441   const int half_height = DivideBy2(height_ + 1);
442   assert(half_height > 0);
443   // ceil(half_height / 16.0)
444   const int max_luma_num = DivideBy16(half_height + 15);
445   constexpr int kNoiseStripeHeight = 34;
446   size_t noise_buffer_size = kNoiseStripePadding;
447   if (params_.num_y_points > 0) {
448     noise_buffer_size += max_luma_num * kNoiseStripeHeight * width_;
449   }
450   if (!is_monochrome_) {
451     noise_buffer_size += 2 * max_luma_num *
452                          (kNoiseStripeHeight >> subsampling_y_) *
453                          SubsampledValue(width_, subsampling_x_);
454   }
455   noise_buffer_.reset(new (std::nothrow) GrainType[noise_buffer_size]);
456   if (noise_buffer_ == nullptr) return false;
457   GrainType* noise_buffer = noise_buffer_.get();
458   if (params_.num_y_points > 0) {
459     noise_stripes_[kPlaneY].Reset(max_luma_num, kNoiseStripeHeight * width_,
460                                   noise_buffer);
461     noise_buffer += max_luma_num * kNoiseStripeHeight * width_;
462   }
463   if (!is_monochrome_) {
464     noise_stripes_[kPlaneU].Reset(max_luma_num,
465                                   (kNoiseStripeHeight >> subsampling_y_) *
466                                       SubsampledValue(width_, subsampling_x_),
467                                   noise_buffer);
468     noise_buffer += max_luma_num * (kNoiseStripeHeight >> subsampling_y_) *
469                     SubsampledValue(width_, subsampling_x_);
470     noise_stripes_[kPlaneV].Reset(max_luma_num,
471                                   (kNoiseStripeHeight >> subsampling_y_) *
472                                       SubsampledValue(width_, subsampling_x_),
473                                   noise_buffer);
474   }
475   return true;
476 }
477 
478 template <int bitdepth>
AllocateNoiseImage()479 bool FilmGrain<bitdepth>::AllocateNoiseImage() {
480   // When LIBGAV1_MSAN is enabled, zero initialize to quiet optimized film grain
481   // msan warnings.
482   constexpr bool zero_initialize = LIBGAV1_MSAN == 1;
483   if (params_.num_y_points > 0 &&
484       !noise_image_[kPlaneY].Reset(height_, width_ + kNoiseImagePadding,
485                                    zero_initialize)) {
486     return false;
487   }
488   if (!is_monochrome_) {
489     if (!noise_image_[kPlaneU].Reset(
490             (height_ + subsampling_y_) >> subsampling_y_,
491             ((width_ + subsampling_x_) >> subsampling_x_) + kNoiseImagePadding,
492             zero_initialize)) {
493       return false;
494     }
495     if (!noise_image_[kPlaneV].Reset(
496             (height_ + subsampling_y_) >> subsampling_y_,
497             ((width_ + subsampling_x_) >> subsampling_x_) + kNoiseImagePadding,
498             zero_initialize)) {
499       return false;
500     }
501   }
502   return true;
503 }
504 
505 // Uses |overlap_flag| to skip rows that are covered by the overlap computation.
506 template <int bitdepth>
ConstructNoiseImage(const Array2DView<GrainType> * noise_stripes,int width,int height,int subsampling_x,int subsampling_y,int stripe_start_offset,Array2D<GrainType> * noise_image)507 void FilmGrain<bitdepth>::ConstructNoiseImage(
508     const Array2DView<GrainType>* noise_stripes, int width, int height,
509     int subsampling_x, int subsampling_y, int stripe_start_offset,
510     Array2D<GrainType>* noise_image) {
511   const int plane_width = (width + subsampling_x) >> subsampling_x;
512   const int plane_height = (height + subsampling_y) >> subsampling_y;
513   const int stripe_height = 32 >> subsampling_y;
514   const int stripe_mask = stripe_height - 1;
515   int y = 0;
516   // |luma_num| = y >> (5 - |subsampling_y|). Hence |luma_num| == 0 for all y up
517   // to either 16 or 32.
518   const GrainType* first_noise_stripe = (*noise_stripes)[0];
519   do {
520     memcpy((*noise_image)[y], first_noise_stripe + y * plane_width,
521            plane_width * sizeof(first_noise_stripe[0]));
522   } while (++y < std::min(stripe_height, plane_height));
523   // End special iterations for luma_num == 0.
524 
525   int luma_num = 1;
526   for (; y < (plane_height & ~stripe_mask); ++luma_num, y += stripe_height) {
527     const GrainType* noise_stripe = (*noise_stripes)[luma_num];
528     int i = stripe_start_offset;
529     do {
530       memcpy((*noise_image)[y + i], noise_stripe + i * plane_width,
531              plane_width * sizeof(noise_stripe[0]));
532     } while (++i < stripe_height);
533   }
534 
535   // If there is a partial stripe, copy any rows beyond the overlap rows.
536   const int remaining_height = plane_height - y;
537   if (remaining_height > stripe_start_offset) {
538     assert(luma_num < noise_stripes->rows());
539     const GrainType* noise_stripe = (*noise_stripes)[luma_num];
540     int i = stripe_start_offset;
541     do {
542       memcpy((*noise_image)[y + i], noise_stripe + i * plane_width,
543              plane_width * sizeof(noise_stripe[0]));
544     } while (++i < remaining_height);
545   }
546 }
547 
548 template <int bitdepth>
BlendNoiseChromaWorker(const dsp::Dsp & dsp,const Plane * planes,int num_planes,std::atomic<int> * job_counter,int min_value,int max_chroma,const uint8_t * source_plane_y,ptrdiff_t source_stride_y,const uint8_t * source_plane_u,const uint8_t * source_plane_v,ptrdiff_t source_stride_uv,uint8_t * dest_plane_u,uint8_t * dest_plane_v,ptrdiff_t dest_stride_uv)549 void FilmGrain<bitdepth>::BlendNoiseChromaWorker(
550     const dsp::Dsp& dsp, const Plane* planes, int num_planes,
551     std::atomic<int>* job_counter, int min_value, int max_chroma,
552     const uint8_t* source_plane_y, ptrdiff_t source_stride_y,
553     const uint8_t* source_plane_u, const uint8_t* source_plane_v,
554     ptrdiff_t source_stride_uv, uint8_t* dest_plane_u, uint8_t* dest_plane_v,
555     ptrdiff_t dest_stride_uv) {
556   assert(num_planes > 0);
557   const int full_jobs_per_plane = height_ / kFrameChunkHeight;
558   const int remainder_job_height = height_ & (kFrameChunkHeight - 1);
559   const int total_full_jobs = full_jobs_per_plane * num_planes;
560   // If the frame height is not a multiple of kFrameChunkHeight, one job with
561   // a smaller number of rows is necessary at the end of each plane.
562   const int total_jobs =
563       total_full_jobs + ((remainder_job_height == 0) ? 0 : num_planes);
564   int job_index;
565   // Each job corresponds to a slice of kFrameChunkHeight rows in the luma
566   // plane. dsp->blend_noise_chroma handles subsampling.
567   // This loop body handles a slice of one plane or the other, depending on
568   // which are active. That way, threads working on consecutive jobs will keep
569   // the same region of luma source in working memory.
570   while ((job_index = job_counter->fetch_add(1, std::memory_order_relaxed)) <
571          total_jobs) {
572     const Plane plane = planes[job_index % num_planes];
573     const int slice_index = job_index / num_planes;
574     const int start_height = slice_index * kFrameChunkHeight;
575     const int job_height = std::min(height_ - start_height, kFrameChunkHeight);
576 
577     const auto* source_cursor_y = reinterpret_cast<const Pixel*>(
578         source_plane_y + start_height * source_stride_y);
579     const int16_t* scaling_lut_uv;
580     const uint8_t* source_plane_uv;
581     uint8_t* dest_plane_uv;
582 
583     if (plane == kPlaneU) {
584       scaling_lut_uv = scaling_lut_u_;
585       source_plane_uv = source_plane_u;
586       dest_plane_uv = dest_plane_u;
587     } else {
588       assert(plane == kPlaneV);
589       scaling_lut_uv = scaling_lut_v_;
590       source_plane_uv = source_plane_v;
591       dest_plane_uv = dest_plane_v;
592     }
593     const auto* source_cursor_uv = reinterpret_cast<const Pixel*>(
594         source_plane_uv + (start_height >> subsampling_y_) * source_stride_uv);
595     auto* dest_cursor_uv = reinterpret_cast<Pixel*>(
596         dest_plane_uv + (start_height >> subsampling_y_) * dest_stride_uv);
597     dsp.film_grain.blend_noise_chroma[params_.chroma_scaling_from_luma](
598         plane, params_, noise_image_, min_value, max_chroma, width_, job_height,
599         start_height, subsampling_x_, subsampling_y_, scaling_lut_uv,
600         source_cursor_y, source_stride_y, source_cursor_uv, source_stride_uv,
601         dest_cursor_uv, dest_stride_uv);
602   }
603 }
604 
605 template <int bitdepth>
BlendNoiseLumaWorker(const dsp::Dsp & dsp,std::atomic<int> * job_counter,int min_value,int max_luma,const uint8_t * source_plane_y,ptrdiff_t source_stride_y,uint8_t * dest_plane_y,ptrdiff_t dest_stride_y)606 void FilmGrain<bitdepth>::BlendNoiseLumaWorker(
607     const dsp::Dsp& dsp, std::atomic<int>* job_counter, int min_value,
608     int max_luma, const uint8_t* source_plane_y, ptrdiff_t source_stride_y,
609     uint8_t* dest_plane_y, ptrdiff_t dest_stride_y) {
610   const int total_full_jobs = height_ / kFrameChunkHeight;
611   const int remainder_job_height = height_ & (kFrameChunkHeight - 1);
612   const int total_jobs =
613       total_full_jobs + static_cast<int>(remainder_job_height > 0);
614   int job_index;
615   // Each job is some number of rows in a plane.
616   while ((job_index = job_counter->fetch_add(1, std::memory_order_relaxed)) <
617          total_jobs) {
618     const int start_height = job_index * kFrameChunkHeight;
619     const int job_height = std::min(height_ - start_height, kFrameChunkHeight);
620 
621     const auto* source_cursor_y = reinterpret_cast<const Pixel*>(
622         source_plane_y + start_height * source_stride_y);
623     auto* dest_cursor_y =
624         reinterpret_cast<Pixel*>(dest_plane_y + start_height * dest_stride_y);
625     dsp.film_grain.blend_noise_luma(
626         noise_image_, min_value, max_luma, params_.chroma_scaling, width_,
627         job_height, start_height, scaling_lut_y_, source_cursor_y,
628         source_stride_y, dest_cursor_y, dest_stride_y);
629   }
630 }
631 
632 template <int bitdepth>
AddNoise(const uint8_t * source_plane_y,ptrdiff_t source_stride_y,const uint8_t * source_plane_u,const uint8_t * source_plane_v,ptrdiff_t source_stride_uv,uint8_t * dest_plane_y,ptrdiff_t dest_stride_y,uint8_t * dest_plane_u,uint8_t * dest_plane_v,ptrdiff_t dest_stride_uv)633 bool FilmGrain<bitdepth>::AddNoise(
634     const uint8_t* source_plane_y, ptrdiff_t source_stride_y,
635     const uint8_t* source_plane_u, const uint8_t* source_plane_v,
636     ptrdiff_t source_stride_uv, uint8_t* dest_plane_y, ptrdiff_t dest_stride_y,
637     uint8_t* dest_plane_u, uint8_t* dest_plane_v, ptrdiff_t dest_stride_uv) {
638   if (!Init()) {
639     LIBGAV1_DLOG(ERROR, "Init() failed.");
640     return false;
641   }
642   if (!AllocateNoiseStripes()) {
643     LIBGAV1_DLOG(ERROR, "AllocateNoiseStripes() failed.");
644     return false;
645   }
646 
647   const dsp::Dsp& dsp = *dsp::GetDspTable(bitdepth);
648   const bool use_luma = params_.num_y_points > 0;
649 
650   // Construct noise stripes.
651   if (use_luma) {
652     // The luma plane is never subsampled.
653     dsp.film_grain
654         .construct_noise_stripes[static_cast<int>(params_.overlap_flag)](
655             luma_grain_, params_.grain_seed, width_, height_,
656             /*subsampling_x=*/0, /*subsampling_y=*/0, &noise_stripes_[kPlaneY]);
657   }
658   if (!is_monochrome_) {
659     dsp.film_grain
660         .construct_noise_stripes[static_cast<int>(params_.overlap_flag)](
661             u_grain_, params_.grain_seed, width_, height_, subsampling_x_,
662             subsampling_y_, &noise_stripes_[kPlaneU]);
663     dsp.film_grain
664         .construct_noise_stripes[static_cast<int>(params_.overlap_flag)](
665             v_grain_, params_.grain_seed, width_, height_, subsampling_x_,
666             subsampling_y_, &noise_stripes_[kPlaneV]);
667   }
668 
669   if (!AllocateNoiseImage()) {
670     LIBGAV1_DLOG(ERROR, "AllocateNoiseImage() failed.");
671     return false;
672   }
673 
674   // Construct noise image.
675   if (use_luma) {
676     ConstructNoiseImage(
677         &noise_stripes_[kPlaneY], width_, height_, /*subsampling_x=*/0,
678         /*subsampling_y=*/0, static_cast<int>(params_.overlap_flag) << 1,
679         &noise_image_[kPlaneY]);
680     if (params_.overlap_flag) {
681       dsp.film_grain.construct_noise_image_overlap(
682           &noise_stripes_[kPlaneY], width_, height_, /*subsampling_x=*/0,
683           /*subsampling_y=*/0, &noise_image_[kPlaneY]);
684     }
685   }
686   if (!is_monochrome_) {
687     ConstructNoiseImage(&noise_stripes_[kPlaneU], width_, height_,
688                         subsampling_x_, subsampling_y_,
689                         static_cast<int>(params_.overlap_flag)
690                             << (1 - subsampling_y_),
691                         &noise_image_[kPlaneU]);
692     ConstructNoiseImage(&noise_stripes_[kPlaneV], width_, height_,
693                         subsampling_x_, subsampling_y_,
694                         static_cast<int>(params_.overlap_flag)
695                             << (1 - subsampling_y_),
696                         &noise_image_[kPlaneV]);
697     if (params_.overlap_flag) {
698       dsp.film_grain.construct_noise_image_overlap(
699           &noise_stripes_[kPlaneU], width_, height_, subsampling_x_,
700           subsampling_y_, &noise_image_[kPlaneU]);
701       dsp.film_grain.construct_noise_image_overlap(
702           &noise_stripes_[kPlaneV], width_, height_, subsampling_x_,
703           subsampling_y_, &noise_image_[kPlaneV]);
704     }
705   }
706 
707   // Blend noise image.
708   int min_value;
709   int max_luma;
710   int max_chroma;
711   if (params_.clip_to_restricted_range) {
712     min_value = 16 << (bitdepth - kBitdepth8);
713     max_luma = 235 << (bitdepth - kBitdepth8);
714     if (color_matrix_is_identity_) {
715       max_chroma = max_luma;
716     } else {
717       max_chroma = 240 << (bitdepth - kBitdepth8);
718     }
719   } else {
720     min_value = 0;
721     max_luma = (256 << (bitdepth - kBitdepth8)) - 1;
722     max_chroma = max_luma;
723   }
724 
725   // Handle all chroma planes first because luma source may be altered in place.
726   if (!is_monochrome_) {
727     // This is done in a strange way but Vector can't be passed by copy to the
728     // lambda capture that spawns the thread.
729     Plane planes_to_blend[2];
730     int num_planes = 0;
731     if (params_.chroma_scaling_from_luma) {
732       // Both noise planes are computed from the luma scaling lookup table.
733       planes_to_blend[num_planes++] = kPlaneU;
734       planes_to_blend[num_planes++] = kPlaneV;
735     } else {
736       const int height_uv = SubsampledValue(height_, subsampling_y_);
737       const int width_uv = SubsampledValue(width_, subsampling_x_);
738 
739       // Noise is applied according to a lookup table defined by pieceiwse
740       // linear "points." If the lookup table is empty, that corresponds to
741       // outputting zero noise.
742       if (params_.num_u_points == 0) {
743         CopyImagePlane<Pixel>(source_plane_u, source_stride_uv, width_uv,
744                               height_uv, dest_plane_u, dest_stride_uv);
745       } else {
746         planes_to_blend[num_planes++] = kPlaneU;
747       }
748       if (params_.num_v_points == 0) {
749         CopyImagePlane<Pixel>(source_plane_v, source_stride_uv, width_uv,
750                               height_uv, dest_plane_v, dest_stride_uv);
751       } else {
752         planes_to_blend[num_planes++] = kPlaneV;
753       }
754     }
755     if (thread_pool_ != nullptr && num_planes > 0) {
756       const int num_workers = thread_pool_->num_threads();
757       BlockingCounter pending_workers(num_workers);
758       std::atomic<int> job_counter(0);
759       for (int i = 0; i < num_workers; ++i) {
760         thread_pool_->Schedule([this, dsp, &pending_workers, &planes_to_blend,
761                                 num_planes, &job_counter, min_value, max_chroma,
762                                 source_plane_y, source_stride_y, source_plane_u,
763                                 source_plane_v, source_stride_uv, dest_plane_u,
764                                 dest_plane_v, dest_stride_uv]() {
765           BlendNoiseChromaWorker(dsp, planes_to_blend, num_planes, &job_counter,
766                                  min_value, max_chroma, source_plane_y,
767                                  source_stride_y, source_plane_u,
768                                  source_plane_v, source_stride_uv, dest_plane_u,
769                                  dest_plane_v, dest_stride_uv);
770           pending_workers.Decrement();
771         });
772       }
773       BlendNoiseChromaWorker(
774           dsp, planes_to_blend, num_planes, &job_counter, min_value, max_chroma,
775           source_plane_y, source_stride_y, source_plane_u, source_plane_v,
776           source_stride_uv, dest_plane_u, dest_plane_v, dest_stride_uv);
777 
778       pending_workers.Wait();
779     } else {
780       // Single threaded.
781       if (params_.num_u_points > 0 || params_.chroma_scaling_from_luma) {
782         dsp.film_grain.blend_noise_chroma[params_.chroma_scaling_from_luma](
783             kPlaneU, params_, noise_image_, min_value, max_chroma, width_,
784             height_, /*start_height=*/0, subsampling_x_, subsampling_y_,
785             scaling_lut_u_, source_plane_y, source_stride_y, source_plane_u,
786             source_stride_uv, dest_plane_u, dest_stride_uv);
787       }
788       if (params_.num_v_points > 0 || params_.chroma_scaling_from_luma) {
789         dsp.film_grain.blend_noise_chroma[params_.chroma_scaling_from_luma](
790             kPlaneV, params_, noise_image_, min_value, max_chroma, width_,
791             height_, /*start_height=*/0, subsampling_x_, subsampling_y_,
792             scaling_lut_v_, source_plane_y, source_stride_y, source_plane_v,
793             source_stride_uv, dest_plane_v, dest_stride_uv);
794       }
795     }
796   }
797   if (use_luma) {
798     if (thread_pool_ != nullptr) {
799       const int num_workers = thread_pool_->num_threads();
800       BlockingCounter pending_workers(num_workers);
801       std::atomic<int> job_counter(0);
802       for (int i = 0; i < num_workers; ++i) {
803         thread_pool_->Schedule(
804             [this, dsp, &pending_workers, &job_counter, min_value, max_luma,
805              source_plane_y, source_stride_y, dest_plane_y, dest_stride_y]() {
806               BlendNoiseLumaWorker(dsp, &job_counter, min_value, max_luma,
807                                    source_plane_y, source_stride_y,
808                                    dest_plane_y, dest_stride_y);
809               pending_workers.Decrement();
810             });
811       }
812 
813       BlendNoiseLumaWorker(dsp, &job_counter, min_value, max_luma,
814                            source_plane_y, source_stride_y, dest_plane_y,
815                            dest_stride_y);
816       pending_workers.Wait();
817     } else {
818       dsp.film_grain.blend_noise_luma(
819           noise_image_, min_value, max_luma, params_.chroma_scaling, width_,
820           height_, /*start_height=*/0, scaling_lut_y_, source_plane_y,
821           source_stride_y, dest_plane_y, dest_stride_y);
822     }
823   } else {
824     CopyImagePlane<Pixel>(source_plane_y, source_stride_y, width_, height_,
825                           dest_plane_y, dest_stride_y);
826   }
827 
828   return true;
829 }
830 
831 // Explicit instantiations.
832 template class FilmGrain<kBitdepth8>;
833 #if LIBGAV1_MAX_BITDEPTH >= 10
834 template class FilmGrain<kBitdepth10>;
835 #endif
836 #if LIBGAV1_MAX_BITDEPTH == 12
837 template class FilmGrain<kBitdepth12>;
838 #endif
839 
840 }  // namespace libgav1
841