• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "skcms.h"
9 #include "skcms_internal.h"
10 #include <assert.h>
11 #include <float.h>
12 #include <limits.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #if defined(__ARM_NEON)
17     #include <arm_neon.h>
18 #elif defined(__SSE__)
19     #include <immintrin.h>
20 
21     #if defined(__clang__)
22         // That #include <immintrin.h> is usually enough, but Clang's headers
23         // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24         // defined, because lots of programs on Windows would include that and
25         // it'd be a lot slower.  But we want all those headers included so we
26         // can use their features after runtime checks later.
27         #include <smmintrin.h>
28         #include <avxintrin.h>
29         #include <avx2intrin.h>
30         #include <avx512fintrin.h>
31         #include <avx512dqintrin.h>
32     #endif
33 #endif
34 
35 // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
36 // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
37 //
38 // Please do not use sizeof() directly, and size_t only when required.
39 // (We have no way of enforcing these requests...)
40 #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
41 
42 // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
43 #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
44 
45 static const union {
46     uint32_t bits;
47     float    f;
48 } inf_ = { 0x7f800000 };
49 #define INFINITY_ inf_.f
50 
fmaxf_(float x,float y)51 static float fmaxf_(float x, float y) { return x > y ? x : y; }
fminf_(float x,float y)52 static float fminf_(float x, float y) { return x < y ? x : y; }
53 
isfinitef_(float x)54 static bool isfinitef_(float x) { return 0 == x*0; }
55 
minus_1_ulp(float x)56 static float minus_1_ulp(float x) {
57     int32_t bits;
58     memcpy(&bits, &x, sizeof(bits));
59     bits = bits - 1;
60     memcpy(&x, &bits, sizeof(bits));
61     return x;
62 }
63 
eval_curve(const skcms_Curve * curve,float x)64 static float eval_curve(const skcms_Curve* curve, float x) {
65     if (curve->table_entries == 0) {
66         return skcms_TransferFunction_eval(&curve->parametric, x);
67     }
68 
69     float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
70     int   lo = (int)                   ix        ,
71           hi = (int)(float)minus_1_ulp(ix + 1.0f);
72     float t = ix - (float)lo;
73 
74     float l, h;
75     if (curve->table_8) {
76         l = curve->table_8[lo] * (1/255.0f);
77         h = curve->table_8[hi] * (1/255.0f);
78     } else {
79         uint16_t be_l, be_h;
80         memcpy(&be_l, curve->table_16 + 2*lo, 2);
81         memcpy(&be_h, curve->table_16 + 2*hi, 2);
82         uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
83         uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
84         l = le_l * (1/65535.0f);
85         h = le_h * (1/65535.0f);
86     }
87     return l + (h-l)*t;
88 }
89 
max_roundtrip_error(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)90 static float max_roundtrip_error(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
91     uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
92     const float dx = 1.0f / (N - 1);
93     float err = 0;
94     for (uint32_t i = 0; i < N; i++) {
95         float x = i * dx,
96               y = eval_curve(curve, x);
97         err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
98     }
99     return err;
100 }
101 
skcms_AreApproximateInverses(const skcms_Curve * curve,const skcms_TransferFunction * inv_tf)102 bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
103     return max_roundtrip_error(curve, inv_tf) < (1/512.0f);
104 }
105 
106 // Additional ICC signature values that are only used internally
107 enum {
108     // File signature
109     skcms_Signature_acsp = 0x61637370,
110 
111     // Tag signatures
112     skcms_Signature_rTRC = 0x72545243,
113     skcms_Signature_gTRC = 0x67545243,
114     skcms_Signature_bTRC = 0x62545243,
115     skcms_Signature_kTRC = 0x6B545243,
116 
117     skcms_Signature_rXYZ = 0x7258595A,
118     skcms_Signature_gXYZ = 0x6758595A,
119     skcms_Signature_bXYZ = 0x6258595A,
120 
121     skcms_Signature_A2B0 = 0x41324230,
122     skcms_Signature_A2B1 = 0x41324231,
123     skcms_Signature_mAB  = 0x6D414220,
124 
125     skcms_Signature_CHAD = 0x63686164,
126 
127     // Type signatures
128     skcms_Signature_curv = 0x63757276,
129     skcms_Signature_mft1 = 0x6D667431,
130     skcms_Signature_mft2 = 0x6D667432,
131     skcms_Signature_para = 0x70617261,
132     skcms_Signature_sf32 = 0x73663332,
133     // XYZ is also a PCS signature, so it's defined in skcms.h
134     // skcms_Signature_XYZ = 0x58595A20,
135 };
136 
read_big_u16(const uint8_t * ptr)137 static uint16_t read_big_u16(const uint8_t* ptr) {
138     uint16_t be;
139     memcpy(&be, ptr, sizeof(be));
140 #if defined(_MSC_VER)
141     return _byteswap_ushort(be);
142 #else
143     return __builtin_bswap16(be);
144 #endif
145 }
146 
read_big_u32(const uint8_t * ptr)147 static uint32_t read_big_u32(const uint8_t* ptr) {
148     uint32_t be;
149     memcpy(&be, ptr, sizeof(be));
150 #if defined(_MSC_VER)
151     return _byteswap_ulong(be);
152 #else
153     return __builtin_bswap32(be);
154 #endif
155 }
156 
read_big_i32(const uint8_t * ptr)157 static int32_t read_big_i32(const uint8_t* ptr) {
158     return (int32_t)read_big_u32(ptr);
159 }
160 
read_big_fixed(const uint8_t * ptr)161 static float read_big_fixed(const uint8_t* ptr) {
162     return read_big_i32(ptr) * (1.0f / 65536.0f);
163 }
164 
165 // Maps to an in-memory profile so that fields line up to the locations specified
166 // in ICC.1:2010, section 7.2
167 typedef struct {
168     uint8_t size                [ 4];
169     uint8_t cmm_type            [ 4];
170     uint8_t version             [ 4];
171     uint8_t profile_class       [ 4];
172     uint8_t data_color_space    [ 4];
173     uint8_t pcs                 [ 4];
174     uint8_t creation_date_time  [12];
175     uint8_t signature           [ 4];
176     uint8_t platform            [ 4];
177     uint8_t flags               [ 4];
178     uint8_t device_manufacturer [ 4];
179     uint8_t device_model        [ 4];
180     uint8_t device_attributes   [ 8];
181     uint8_t rendering_intent    [ 4];
182     uint8_t illuminant_X        [ 4];
183     uint8_t illuminant_Y        [ 4];
184     uint8_t illuminant_Z        [ 4];
185     uint8_t creator             [ 4];
186     uint8_t profile_id          [16];
187     uint8_t reserved            [28];
188     uint8_t tag_count           [ 4]; // Technically not part of header, but required
189 } header_Layout;
190 
191 typedef struct {
192     uint8_t signature [4];
193     uint8_t offset    [4];
194     uint8_t size      [4];
195 } tag_Layout;
196 
get_tag_table(const skcms_ICCProfile * profile)197 static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
198     return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
199 }
200 
201 // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
202 // use of the type is for the CHAD tag that stores exactly nine values.
203 typedef struct {
204     uint8_t type     [ 4];
205     uint8_t reserved [ 4];
206     uint8_t values   [36];
207 } sf32_Layout;
208 
skcms_GetCHAD(const skcms_ICCProfile * profile,skcms_Matrix3x3 * m)209 bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
210     skcms_ICCTag tag;
211     if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
212         return false;
213     }
214 
215     if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
216         return false;
217     }
218 
219     const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
220     const uint8_t* values = sf32Tag->values;
221     for (int r = 0; r < 3; ++r)
222     for (int c = 0; c < 3; ++c, values += 4) {
223         m->vals[r][c] = read_big_fixed(values);
224     }
225     return true;
226 }
227 
228 // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
229 // the type are for tags/data that store exactly one triple.
230 typedef struct {
231     uint8_t type     [4];
232     uint8_t reserved [4];
233     uint8_t X        [4];
234     uint8_t Y        [4];
235     uint8_t Z        [4];
236 } XYZ_Layout;
237 
read_tag_xyz(const skcms_ICCTag * tag,float * x,float * y,float * z)238 static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
239     if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
240         return false;
241     }
242 
243     const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
244 
245     *x = read_big_fixed(xyzTag->X);
246     *y = read_big_fixed(xyzTag->Y);
247     *z = read_big_fixed(xyzTag->Z);
248     return true;
249 }
250 
read_to_XYZD50(const skcms_ICCTag * rXYZ,const skcms_ICCTag * gXYZ,const skcms_ICCTag * bXYZ,skcms_Matrix3x3 * toXYZ)251 static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
252                            const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
253     return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
254            read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
255            read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
256 }
257 
tf_is_valid(const skcms_TransferFunction * tf)258 static bool tf_is_valid(const skcms_TransferFunction* tf) {
259     // Reject obviously malformed inputs
260     if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) {
261         return false;
262     }
263 
264     // All of these parameters should be non-negative
265     if (tf->a < 0 || tf->c < 0 || tf->d < 0 || tf->g < 0) {
266         return false;
267     }
268 
269     return true;
270 }
271 
272 typedef struct {
273     uint8_t type          [4];
274     uint8_t reserved_a    [4];
275     uint8_t function_type [2];
276     uint8_t reserved_b    [2];
277     uint8_t variable      [1/*variable*/];  // 1, 3, 4, 5, or 7 s15.16, depending on function_type
278 } para_Layout;
279 
read_curve_para(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)280 static bool read_curve_para(const uint8_t* buf, uint32_t size,
281                             skcms_Curve* curve, uint32_t* curve_size) {
282     if (size < SAFE_FIXED_SIZE(para_Layout)) {
283         return false;
284     }
285 
286     const para_Layout* paraTag = (const para_Layout*)buf;
287 
288     enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
289     uint16_t function_type = read_big_u16(paraTag->function_type);
290     if (function_type > kGABCDEF) {
291         return false;
292     }
293 
294     static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
295     if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
296         return false;
297     }
298 
299     if (curve_size) {
300         *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
301     }
302 
303     curve->table_entries = 0;
304     curve->parametric.a  = 1.0f;
305     curve->parametric.b  = 0.0f;
306     curve->parametric.c  = 0.0f;
307     curve->parametric.d  = 0.0f;
308     curve->parametric.e  = 0.0f;
309     curve->parametric.f  = 0.0f;
310     curve->parametric.g  = read_big_fixed(paraTag->variable);
311 
312     switch (function_type) {
313         case kGAB:
314             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
315             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
316             if (curve->parametric.a == 0) {
317                 return false;
318             }
319             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
320             break;
321         case kGABC:
322             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
323             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
324             curve->parametric.e = read_big_fixed(paraTag->variable + 12);
325             if (curve->parametric.a == 0) {
326                 return false;
327             }
328             curve->parametric.d = -curve->parametric.b / curve->parametric.a;
329             curve->parametric.f = curve->parametric.e;
330             break;
331         case kGABCD:
332             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
333             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
334             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
335             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
336             break;
337         case kGABCDEF:
338             curve->parametric.a = read_big_fixed(paraTag->variable + 4);
339             curve->parametric.b = read_big_fixed(paraTag->variable + 8);
340             curve->parametric.c = read_big_fixed(paraTag->variable + 12);
341             curve->parametric.d = read_big_fixed(paraTag->variable + 16);
342             curve->parametric.e = read_big_fixed(paraTag->variable + 20);
343             curve->parametric.f = read_big_fixed(paraTag->variable + 24);
344             break;
345     }
346     return tf_is_valid(&curve->parametric);
347 }
348 
349 typedef struct {
350     uint8_t type          [4];
351     uint8_t reserved      [4];
352     uint8_t value_count   [4];
353     uint8_t variable      [1/*variable*/];  // value_count, 8.8 if 1, uint16 (n*65535) if > 1
354 } curv_Layout;
355 
read_curve_curv(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)356 static bool read_curve_curv(const uint8_t* buf, uint32_t size,
357                             skcms_Curve* curve, uint32_t* curve_size) {
358     if (size < SAFE_FIXED_SIZE(curv_Layout)) {
359         return false;
360     }
361 
362     const curv_Layout* curvTag = (const curv_Layout*)buf;
363 
364     uint32_t value_count = read_big_u32(curvTag->value_count);
365     if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
366         return false;
367     }
368 
369     if (curve_size) {
370         *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
371     }
372 
373     if (value_count < 2) {
374         curve->table_entries = 0;
375         curve->parametric.a  = 1.0f;
376         curve->parametric.b  = 0.0f;
377         curve->parametric.c  = 0.0f;
378         curve->parametric.d  = 0.0f;
379         curve->parametric.e  = 0.0f;
380         curve->parametric.f  = 0.0f;
381         if (value_count == 0) {
382             // Empty tables are a shorthand for an identity curve
383             curve->parametric.g = 1.0f;
384         } else {
385             // Single entry tables are a shorthand for simple gamma
386             curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
387         }
388     } else {
389         curve->table_8       = nullptr;
390         curve->table_16      = curvTag->variable;
391         curve->table_entries = value_count;
392     }
393 
394     return true;
395 }
396 
397 // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
398 // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
read_curve(const uint8_t * buf,uint32_t size,skcms_Curve * curve,uint32_t * curve_size)399 static bool read_curve(const uint8_t* buf, uint32_t size,
400                        skcms_Curve* curve, uint32_t* curve_size) {
401     if (!buf || size < 4 || !curve) {
402         return false;
403     }
404 
405     uint32_t type = read_big_u32(buf);
406     if (type == skcms_Signature_para) {
407         return read_curve_para(buf, size, curve, curve_size);
408     } else if (type == skcms_Signature_curv) {
409         return read_curve_curv(buf, size, curve, curve_size);
410     }
411 
412     return false;
413 }
414 
415 // mft1 and mft2 share a large chunk of data
416 typedef struct {
417     uint8_t type                 [ 4];
418     uint8_t reserved_a           [ 4];
419     uint8_t input_channels       [ 1];
420     uint8_t output_channels      [ 1];
421     uint8_t grid_points          [ 1];
422     uint8_t reserved_b           [ 1];
423     uint8_t matrix               [36];
424 } mft_CommonLayout;
425 
426 typedef struct {
427     mft_CommonLayout common      [1];
428 
429     uint8_t variable             [1/*variable*/];
430 } mft1_Layout;
431 
432 typedef struct {
433     mft_CommonLayout common      [1];
434 
435     uint8_t input_table_entries  [2];
436     uint8_t output_table_entries [2];
437     uint8_t variable             [1/*variable*/];
438 } mft2_Layout;
439 
read_mft_common(const mft_CommonLayout * mftTag,skcms_A2B * a2b)440 static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
441     // MFT matrices are applied before the first set of curves, but must be identity unless the
442     // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
443     // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
444     // field/flag.
445     a2b->matrix_channels = 0;
446 
447     a2b->input_channels  = mftTag->input_channels[0];
448     a2b->output_channels = mftTag->output_channels[0];
449 
450     // We require exactly three (ie XYZ/Lab/RGB) output channels
451     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
452         return false;
453     }
454     // We require at least one, and no more than four (ie CMYK) input channels
455     if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
456         return false;
457     }
458 
459     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
460         a2b->grid_points[i] = mftTag->grid_points[0];
461     }
462     // The grid only makes sense with at least two points along each axis
463     if (a2b->grid_points[0] < 2) {
464         return false;
465     }
466 
467     return true;
468 }
469 
init_a2b_tables(const uint8_t * table_base,uint64_t max_tables_len,uint32_t byte_width,uint32_t input_table_entries,uint32_t output_table_entries,skcms_A2B * a2b)470 static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
471                             uint32_t input_table_entries, uint32_t output_table_entries,
472                             skcms_A2B* a2b) {
473     // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
474     uint32_t byte_len_per_input_table  = input_table_entries * byte_width;
475     uint32_t byte_len_per_output_table = output_table_entries * byte_width;
476 
477     // [input|output]_channels are <= 4, so still no overflow
478     uint32_t byte_len_all_input_tables  = a2b->input_channels * byte_len_per_input_table;
479     uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
480 
481     uint64_t grid_size = a2b->output_channels * byte_width;
482     for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
483         grid_size *= a2b->grid_points[axis];
484     }
485 
486     if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
487         return false;
488     }
489 
490     for (uint32_t i = 0; i < a2b->input_channels; ++i) {
491         a2b->input_curves[i].table_entries = input_table_entries;
492         if (byte_width == 1) {
493             a2b->input_curves[i].table_8  = table_base + i * byte_len_per_input_table;
494             a2b->input_curves[i].table_16 = nullptr;
495         } else {
496             a2b->input_curves[i].table_8  = nullptr;
497             a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
498         }
499     }
500 
501     if (byte_width == 1) {
502         a2b->grid_8  = table_base + byte_len_all_input_tables;
503         a2b->grid_16 = nullptr;
504     } else {
505         a2b->grid_8  = nullptr;
506         a2b->grid_16 = table_base + byte_len_all_input_tables;
507     }
508 
509     const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
510     for (uint32_t i = 0; i < a2b->output_channels; ++i) {
511         a2b->output_curves[i].table_entries = output_table_entries;
512         if (byte_width == 1) {
513             a2b->output_curves[i].table_8  = output_table_base + i * byte_len_per_output_table;
514             a2b->output_curves[i].table_16 = nullptr;
515         } else {
516             a2b->output_curves[i].table_8  = nullptr;
517             a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
518         }
519     }
520 
521     return true;
522 }
523 
read_tag_mft1(const skcms_ICCTag * tag,skcms_A2B * a2b)524 static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
525     if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
526         return false;
527     }
528 
529     const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
530     if (!read_mft_common(mftTag->common, a2b)) {
531         return false;
532     }
533 
534     uint32_t input_table_entries  = 256;
535     uint32_t output_table_entries = 256;
536 
537     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
538                            input_table_entries, output_table_entries, a2b);
539 }
540 
read_tag_mft2(const skcms_ICCTag * tag,skcms_A2B * a2b)541 static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
542     if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
543         return false;
544     }
545 
546     const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
547     if (!read_mft_common(mftTag->common, a2b)) {
548         return false;
549     }
550 
551     uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
552     uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
553 
554     // ICC spec mandates that 2 <= table_entries <= 4096
555     if (input_table_entries < 2 || input_table_entries > 4096 ||
556         output_table_entries < 2 || output_table_entries > 4096) {
557         return false;
558     }
559 
560     return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
561                            input_table_entries, output_table_entries, a2b);
562 }
563 
read_curves(const uint8_t * buf,uint32_t size,uint32_t curve_offset,uint32_t num_curves,skcms_Curve * curves)564 static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
565                         uint32_t num_curves, skcms_Curve* curves) {
566     for (uint32_t i = 0; i < num_curves; ++i) {
567         if (curve_offset > size) {
568             return false;
569         }
570 
571         uint32_t curve_bytes;
572         if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
573             return false;
574         }
575 
576         if (curve_bytes > UINT32_MAX - 3) {
577             return false;
578         }
579         curve_bytes = (curve_bytes + 3) & ~3U;
580 
581         uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
582         curve_offset = (uint32_t)new_offset_64;
583         if (new_offset_64 != curve_offset) {
584             return false;
585         }
586     }
587 
588     return true;
589 }
590 
591 typedef struct {
592     uint8_t type                 [ 4];
593     uint8_t reserved_a           [ 4];
594     uint8_t input_channels       [ 1];
595     uint8_t output_channels      [ 1];
596     uint8_t reserved_b           [ 2];
597     uint8_t b_curve_offset       [ 4];
598     uint8_t matrix_offset        [ 4];
599     uint8_t m_curve_offset       [ 4];
600     uint8_t clut_offset          [ 4];
601     uint8_t a_curve_offset       [ 4];
602 } mAB_Layout;
603 
604 typedef struct {
605     uint8_t grid_points          [16];
606     uint8_t grid_byte_width      [ 1];
607     uint8_t reserved             [ 3];
608     uint8_t variable             [1/*variable*/];
609 } mABCLUT_Layout;
610 
read_tag_mab(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)611 static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
612     if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
613         return false;
614     }
615 
616     const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
617 
618     a2b->input_channels  = mABTag->input_channels[0];
619     a2b->output_channels = mABTag->output_channels[0];
620 
621     // We require exactly three (ie XYZ/Lab/RGB) output channels
622     if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
623         return false;
624     }
625     // We require no more than four (ie CMYK) input channels
626     if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
627         return false;
628     }
629 
630     uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
631     uint32_t matrix_offset  = read_big_u32(mABTag->matrix_offset);
632     uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
633     uint32_t clut_offset    = read_big_u32(mABTag->clut_offset);
634     uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
635 
636     // "B" curves must be present
637     if (0 == b_curve_offset) {
638         return false;
639     }
640 
641     if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
642                      a2b->output_curves)) {
643         return false;
644     }
645 
646     // "M" curves and Matrix must be used together
647     if (0 != m_curve_offset) {
648         if (0 == matrix_offset) {
649             return false;
650         }
651         a2b->matrix_channels = a2b->output_channels;
652         if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
653                          a2b->matrix_curves)) {
654             return false;
655         }
656 
657         // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
658         if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
659             return false;
660         }
661         float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
662         const uint8_t* mtx_buf = tag->buf + matrix_offset;
663         a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
664         a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
665         a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
666         a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
667         a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
668         a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
669         a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
670         a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
671         a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
672         a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
673         a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
674         a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
675     } else {
676         if (0 != matrix_offset) {
677             return false;
678         }
679         a2b->matrix_channels = 0;
680     }
681 
682     // "A" curves and CLUT must be used together
683     if (0 != a_curve_offset) {
684         if (0 == clut_offset) {
685             return false;
686         }
687         if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
688                          a2b->input_curves)) {
689             return false;
690         }
691 
692         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
693             return false;
694         }
695         const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
696 
697         if (clut->grid_byte_width[0] == 1) {
698             a2b->grid_8  = clut->variable;
699             a2b->grid_16 = nullptr;
700         } else if (clut->grid_byte_width[0] == 2) {
701             a2b->grid_8  = nullptr;
702             a2b->grid_16 = clut->variable;
703         } else {
704             return false;
705         }
706 
707         uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
708         for (uint32_t i = 0; i < a2b->input_channels; ++i) {
709             a2b->grid_points[i] = clut->grid_points[i];
710             // The grid only makes sense with at least two points along each axis
711             if (a2b->grid_points[i] < 2) {
712                 return false;
713             }
714             grid_size *= a2b->grid_points[i];
715         }
716         if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
717             return false;
718         }
719     } else {
720         if (0 != clut_offset) {
721             return false;
722         }
723 
724         // If there is no CLUT, the number of input and output channels must match
725         if (a2b->input_channels != a2b->output_channels) {
726             return false;
727         }
728 
729         // Zero out the number of input channels to signal that we're skipping this stage
730         a2b->input_channels = 0;
731     }
732 
733     return true;
734 }
735 
736 // If you pass f, we'll fit a possibly-non-zero value for *f.
737 // If you pass nullptr, we'll assume you want *f to be treated as zero.
fit_linear(const skcms_Curve * curve,int N,float tol,float * c,float * d,float * f=nullptr)738 static int fit_linear(const skcms_Curve* curve, int N, float tol,
739                       float* c, float* d, float* f = nullptr) {
740     assert(N > 1);
741     // We iteratively fit the first points to the TF's linear piece.
742     // We want the cx + f line to pass through the first and last points we fit exactly.
743     //
744     // As we walk along the points we find the minimum and maximum slope of the line before the
745     // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
746     // emtpy, when we definitely can't add any more points.
747     //
748     // Some points' error intervals may intersect the running interval but not lie fully
749     // within it.  So we keep track of the last point we saw that is a valid end point candidate,
750     // and once the search is done, back up to build the line through *that* point.
751     const float dx = 1.0f / (N - 1);
752 
753     int lin_points = 1;
754 
755     float f_zero = 0.0f;
756     if (f) {
757         *f = eval_curve(curve, 0);
758     } else {
759         f = &f_zero;
760     }
761 
762 
763     float slope_min = -INFINITY_;
764     float slope_max = +INFINITY_;
765     for (int i = 1; i < N; ++i) {
766         float x = i * dx;
767         float y = eval_curve(curve, x);
768 
769         float slope_max_i = (y + tol - *f) / x,
770               slope_min_i = (y - tol - *f) / x;
771         if (slope_max_i < slope_min || slope_max < slope_min_i) {
772             // Slope intervals would no longer overlap.
773             break;
774         }
775         slope_max = fminf_(slope_max, slope_max_i);
776         slope_min = fmaxf_(slope_min, slope_min_i);
777 
778         float cur_slope = (y - *f) / x;
779         if (slope_min <= cur_slope && cur_slope <= slope_max) {
780             lin_points = i + 1;
781             *c = cur_slope;
782         }
783     }
784 
785     // Set D to the last point that met our tolerance.
786     *d = (lin_points - 1) * dx;
787     return lin_points;
788 }
789 
read_a2b(const skcms_ICCTag * tag,skcms_A2B * a2b,bool pcs_is_xyz)790 static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
791     bool ok = false;
792     if (tag->type == skcms_Signature_mft1) {
793         ok = read_tag_mft1(tag, a2b);
794     } else if (tag->type == skcms_Signature_mft2) {
795         ok = read_tag_mft2(tag, a2b);
796     } else if (tag->type == skcms_Signature_mAB) {
797         ok = read_tag_mab(tag, a2b, pcs_is_xyz);
798     }
799     if (!ok) {
800         return false;
801     }
802 
803     // Detect and canonicalize identity tables.
804     skcms_Curve* curves[] = {
805         a2b->input_channels  > 0 ? a2b->input_curves  + 0 : nullptr,
806         a2b->input_channels  > 1 ? a2b->input_curves  + 1 : nullptr,
807         a2b->input_channels  > 2 ? a2b->input_curves  + 2 : nullptr,
808         a2b->input_channels  > 3 ? a2b->input_curves  + 3 : nullptr,
809         a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
810         a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
811         a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
812         a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
813         a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
814         a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
815     };
816 
817     for (int i = 0; i < ARRAY_COUNT(curves); i++) {
818         skcms_Curve* curve = curves[i];
819 
820         if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
821             int N = (int)curve->table_entries;
822 
823             float c = 0.0f, d = 0.0f, f = 0.0f;
824             if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
825                 && c == 1.0f
826                 && f == 0.0f) {
827                 curve->table_entries = 0;
828                 curve->table_8       = nullptr;
829                 curve->table_16      = nullptr;
830                 curve->parametric    = skcms_TransferFunction{1,1,0,0,0,0,0};
831             }
832         }
833     }
834 
835     return true;
836 }
837 
skcms_GetTagByIndex(const skcms_ICCProfile * profile,uint32_t idx,skcms_ICCTag * tag)838 void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
839     if (!profile || !profile->buffer || !tag) { return; }
840     if (idx > profile->tag_count) { return; }
841     const tag_Layout* tags = get_tag_table(profile);
842     tag->signature = read_big_u32(tags[idx].signature);
843     tag->size      = read_big_u32(tags[idx].size);
844     tag->buf       = read_big_u32(tags[idx].offset) + profile->buffer;
845     tag->type      = read_big_u32(tag->buf);
846 }
847 
skcms_GetTagBySignature(const skcms_ICCProfile * profile,uint32_t sig,skcms_ICCTag * tag)848 bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
849     if (!profile || !profile->buffer || !tag) { return false; }
850     const tag_Layout* tags = get_tag_table(profile);
851     for (uint32_t i = 0; i < profile->tag_count; ++i) {
852         if (read_big_u32(tags[i].signature) == sig) {
853             tag->signature = sig;
854             tag->size      = read_big_u32(tags[i].size);
855             tag->buf       = read_big_u32(tags[i].offset) + profile->buffer;
856             tag->type      = read_big_u32(tag->buf);
857             return true;
858         }
859     }
860     return false;
861 }
862 
usable_as_src(const skcms_ICCProfile * profile)863 static bool usable_as_src(const skcms_ICCProfile* profile) {
864     return profile->has_A2B
865        || (profile->has_trc && profile->has_toXYZD50);
866 }
867 
skcms_Parse(const void * buf,size_t len,skcms_ICCProfile * profile)868 bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
869     assert(SAFE_SIZEOF(header_Layout) == 132);
870 
871     if (!profile) {
872         return false;
873     }
874     memset(profile, 0, SAFE_SIZEOF(*profile));
875 
876     if (len < SAFE_SIZEOF(header_Layout)) {
877         return false;
878     }
879 
880     // Byte-swap all header fields
881     const header_Layout* header  = (const header_Layout*)buf;
882     profile->buffer              = (const uint8_t*)buf;
883     profile->size                = read_big_u32(header->size);
884     uint32_t version             = read_big_u32(header->version);
885     profile->data_color_space    = read_big_u32(header->data_color_space);
886     profile->pcs                 = read_big_u32(header->pcs);
887     uint32_t signature           = read_big_u32(header->signature);
888     float illuminant_X           = read_big_fixed(header->illuminant_X);
889     float illuminant_Y           = read_big_fixed(header->illuminant_Y);
890     float illuminant_Z           = read_big_fixed(header->illuminant_Z);
891     profile->tag_count           = read_big_u32(header->tag_count);
892 
893     // Validate signature, size (smaller than buffer, large enough to hold tag table),
894     // and major version
895     uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
896     if (signature != skcms_Signature_acsp ||
897         profile->size > len ||
898         profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
899         (version >> 24) > 4) {
900         return false;
901     }
902 
903     // Validate that illuminant is D50 white
904     if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
905         fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
906         fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
907         return false;
908     }
909 
910     // Validate that all tag entries have sane offset + size
911     const tag_Layout* tags = get_tag_table(profile);
912     for (uint32_t i = 0; i < profile->tag_count; ++i) {
913         uint32_t tag_offset = read_big_u32(tags[i].offset);
914         uint32_t tag_size   = read_big_u32(tags[i].size);
915         uint64_t tag_end    = (uint64_t)tag_offset + (uint64_t)tag_size;
916         if (tag_size < 4 || tag_end > profile->size) {
917             return false;
918         }
919     }
920 
921     if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
922         return false;
923     }
924 
925     bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
926 
927     // Pre-parse commonly used tags.
928     skcms_ICCTag kTRC;
929     if (profile->data_color_space == skcms_Signature_Gray &&
930         skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
931         if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
932             // Malformed tag
933             return false;
934         }
935         profile->trc[1] = profile->trc[0];
936         profile->trc[2] = profile->trc[0];
937         profile->has_trc = true;
938 
939         if (pcs_is_xyz) {
940             profile->toXYZD50.vals[0][0] = illuminant_X;
941             profile->toXYZD50.vals[1][1] = illuminant_Y;
942             profile->toXYZD50.vals[2][2] = illuminant_Z;
943             profile->has_toXYZD50 = true;
944         }
945     } else {
946         skcms_ICCTag rTRC, gTRC, bTRC;
947         if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
948             skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
949             skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
950             if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
951                 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
952                 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
953                 // Malformed TRC tags
954                 return false;
955             }
956             profile->has_trc = true;
957         }
958 
959         skcms_ICCTag rXYZ, gXYZ, bXYZ;
960         if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
961             skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
962             skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
963             if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
964                 // Malformed XYZ tags
965                 return false;
966             }
967             profile->has_toXYZD50 = true;
968         }
969     }
970 
971     skcms_ICCTag a2b_tag;
972 
973     // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
974     // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
975     // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
976     // and all our known users are thinking exclusively in terms of relative colormetric.
977     const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
978     for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
979         if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
980             if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
981                 // Malformed A2B tag
982                 return false;
983             }
984             profile->has_A2B = true;
985             break;
986         }
987     }
988 
989     return usable_as_src(profile);
990 }
991 
992 
skcms_sRGB_profile()993 const skcms_ICCProfile* skcms_sRGB_profile() {
994     static const skcms_ICCProfile sRGB_profile = {
995         nullptr,               // buffer, moot here
996 
997         0,                     // size, moot here
998         skcms_Signature_RGB,   // data_color_space
999         skcms_Signature_XYZ,   // pcs
1000         0,                     // tag count, moot here
1001 
1002         // We choose to represent sRGB with its canonical transfer function,
1003         // and with its canonical XYZD50 gamut matrix.
1004         true,  // has_trc, followed by the 3 trc curves
1005         {
1006             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1007             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1008             {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1009         },
1010 
1011         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1012         {{
1013             { 0.436065674f, 0.385147095f, 0.143066406f },
1014             { 0.222488403f, 0.716873169f, 0.060607910f },
1015             { 0.013916016f, 0.097076416f, 0.714096069f },
1016         }},
1017 
1018         false, // has_A2B, followed by a2b itself which we don't care about.
1019         {
1020             0,
1021             {
1022                 {{0, {0,0, 0,0,0,0,0}}},
1023                 {{0, {0,0, 0,0,0,0,0}}},
1024                 {{0, {0,0, 0,0,0,0,0}}},
1025                 {{0, {0,0, 0,0,0,0,0}}},
1026             },
1027             {0,0,0,0},
1028             nullptr,
1029             nullptr,
1030 
1031             0,
1032             {
1033                 {{0, {0,0, 0,0,0,0,0}}},
1034                 {{0, {0,0, 0,0,0,0,0}}},
1035                 {{0, {0,0, 0,0,0,0,0}}},
1036             },
1037             {{
1038                 { 0,0,0,0 },
1039                 { 0,0,0,0 },
1040                 { 0,0,0,0 },
1041             }},
1042 
1043             0,
1044             {
1045                 {{0, {0,0, 0,0,0,0,0}}},
1046                 {{0, {0,0, 0,0,0,0,0}}},
1047                 {{0, {0,0, 0,0,0,0,0}}},
1048             },
1049         },
1050     };
1051     return &sRGB_profile;
1052 }
1053 
skcms_XYZD50_profile()1054 const skcms_ICCProfile* skcms_XYZD50_profile() {
1055     // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1056     static const skcms_ICCProfile XYZD50_profile = {
1057         nullptr,               // buffer, moot here
1058 
1059         0,                     // size, moot here
1060         skcms_Signature_RGB,   // data_color_space
1061         skcms_Signature_XYZ,   // pcs
1062         0,                     // tag count, moot here
1063 
1064         true,  // has_trc, followed by the 3 trc curves
1065         {
1066             {{0, {1,1, 0,0,0,0,0}}},
1067             {{0, {1,1, 0,0,0,0,0}}},
1068             {{0, {1,1, 0,0,0,0,0}}},
1069         },
1070 
1071         true,  // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1072         {{
1073             { 1,0,0 },
1074             { 0,1,0 },
1075             { 0,0,1 },
1076         }},
1077 
1078         false, // has_A2B, followed by a2b itself which we don't care about.
1079         {
1080             0,
1081             {
1082                 {{0, {0,0, 0,0,0,0,0}}},
1083                 {{0, {0,0, 0,0,0,0,0}}},
1084                 {{0, {0,0, 0,0,0,0,0}}},
1085                 {{0, {0,0, 0,0,0,0,0}}},
1086             },
1087             {0,0,0,0},
1088             nullptr,
1089             nullptr,
1090 
1091             0,
1092             {
1093                 {{0, {0,0, 0,0,0,0,0}}},
1094                 {{0, {0,0, 0,0,0,0,0}}},
1095                 {{0, {0,0, 0,0,0,0,0}}},
1096             },
1097             {{
1098                 { 0,0,0,0 },
1099                 { 0,0,0,0 },
1100                 { 0,0,0,0 },
1101             }},
1102 
1103             0,
1104             {
1105                 {{0, {0,0, 0,0,0,0,0}}},
1106                 {{0, {0,0, 0,0,0,0,0}}},
1107                 {{0, {0,0, 0,0,0,0,0}}},
1108             },
1109         },
1110     };
1111 
1112     return &XYZD50_profile;
1113 }
1114 
skcms_sRGB_TransferFunction()1115 const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1116     return &skcms_sRGB_profile()->trc[0].parametric;
1117 }
1118 
skcms_sRGB_Inverse_TransferFunction()1119 const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1120     static const skcms_TransferFunction sRGB_inv =
1121         {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1122     return &sRGB_inv;
1123 }
1124 
skcms_Identity_TransferFunction()1125 const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1126     static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1127     return &identity;
1128 }
1129 
1130 const uint8_t skcms_252_random_bytes[] = {
1131     8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1132     119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1133     154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1134     194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1135     108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1136     70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1137     137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1138     9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1139     129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1140     140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1141     219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1142     123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1143     189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1144     174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1145     2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1146     112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1147 };
1148 
skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile * A,const skcms_ICCProfile * B)1149 bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1150     // Test for exactly equal profiles first.
1151     if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1152         return true;
1153     }
1154 
1155     // For now this is the essentially the same strategy we use in test_only.c
1156     // for our skcms_Transform() smoke tests:
1157     //    1) transform A to XYZD50
1158     //    2) transform B to XYZD50
1159     //    3) return true if they're similar enough
1160     // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1161 
1162     // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1163     // 252 is evenly divisible by 3 and 4.  Only 192, 10, 241, and 43 are missing.
1164 
1165     if (A->data_color_space != B->data_color_space) {
1166         return false;
1167     }
1168 
1169     // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1170     // TODO: working with RGBA_8888 either way is probably fastest.
1171     skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1172     size_t npixels = 84;
1173     if (A->data_color_space == skcms_Signature_CMYK) {
1174         fmt = skcms_PixelFormat_RGBA_8888;
1175         npixels = 63;
1176     }
1177 
1178     // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1179     // use pre-canned results and skip that skcms_Transform() call?
1180     uint8_t dstA[252],
1181             dstB[252];
1182     if (!skcms_Transform(
1183                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, A,
1184                 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1185                 npixels)) {
1186         return false;
1187     }
1188     if (!skcms_Transform(
1189                 skcms_252_random_bytes,     fmt, skcms_AlphaFormat_Unpremul, B,
1190                 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1191                 npixels)) {
1192         return false;
1193     }
1194 
1195     // TODO: make sure this final check has reasonable codegen.
1196     for (size_t i = 0; i < 252; i++) {
1197         if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1198             return false;
1199         }
1200     }
1201     return true;
1202 }
1203 
skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile * profile,const skcms_TransferFunction * inv_tf)1204 bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1205                                       const skcms_TransferFunction* inv_tf) {
1206     if (!profile || !profile->has_trc) {
1207         return false;
1208     }
1209 
1210     return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1211            skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1212            skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1213 }
1214 
is_zero_to_one(float x)1215 static bool is_zero_to_one(float x) {
1216     return 0 <= x && x <= 1;
1217 }
1218 
1219 typedef struct { float vals[3]; } skcms_Vector3;
1220 
mv_mul(const skcms_Matrix3x3 * m,const skcms_Vector3 * v)1221 static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1222     skcms_Vector3 dst = {{0,0,0}};
1223     for (int row = 0; row < 3; ++row) {
1224         dst.vals[row] = m->vals[row][0] * v->vals[0]
1225                       + m->vals[row][1] * v->vals[1]
1226                       + m->vals[row][2] * v->vals[2];
1227     }
1228     return dst;
1229 }
1230 
skcms_PrimariesToXYZD50(float rx,float ry,float gx,float gy,float bx,float by,float wx,float wy,skcms_Matrix3x3 * toXYZD50)1231 bool skcms_PrimariesToXYZD50(float rx, float ry,
1232                              float gx, float gy,
1233                              float bx, float by,
1234                              float wx, float wy,
1235                              skcms_Matrix3x3* toXYZD50) {
1236     if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1237         !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1238         !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1239         !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1240         !toXYZD50) {
1241         return false;
1242     }
1243 
1244     // First, we need to convert xy values (primaries) to XYZ.
1245     skcms_Matrix3x3 primaries = {{
1246         { rx, gx, bx },
1247         { ry, gy, by },
1248         { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1249     }};
1250     skcms_Matrix3x3 primaries_inv;
1251     if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1252         return false;
1253     }
1254 
1255     // Assumes that Y is 1.0f.
1256     skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1257     skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1258 
1259     skcms_Matrix3x3 toXYZ = {{
1260         { XYZ.vals[0],           0,           0 },
1261         {           0, XYZ.vals[1],           0 },
1262         {           0,           0, XYZ.vals[2] },
1263     }};
1264     toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1265 
1266     // Now convert toXYZ matrix to toXYZD50.
1267     skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1268 
1269     // Calculate the chromatic adaptation matrix.  We will use the Bradford method, thus
1270     // the matrices below.  The Bradford method is used by Adobe and is widely considered
1271     // to be the best.
1272     skcms_Matrix3x3 xyz_to_lms = {{
1273         {  0.8951f,  0.2664f, -0.1614f },
1274         { -0.7502f,  1.7135f,  0.0367f },
1275         {  0.0389f, -0.0685f,  1.0296f },
1276     }};
1277     skcms_Matrix3x3 lms_to_xyz = {{
1278         {  0.9869929f, -0.1470543f, 0.1599627f },
1279         {  0.4323053f,  0.5183603f, 0.0492912f },
1280         { -0.0085287f,  0.0400428f, 0.9684867f },
1281     }};
1282 
1283     skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1284     skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1285 
1286     skcms_Matrix3x3 DXtoD50 = {{
1287         { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1288         { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1289         { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1290     }};
1291     DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1292     DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1293 
1294     *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1295     return true;
1296 }
1297 
1298 
skcms_Matrix3x3_invert(const skcms_Matrix3x3 * src,skcms_Matrix3x3 * dst)1299 bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1300     double a00 = src->vals[0][0],
1301            a01 = src->vals[1][0],
1302            a02 = src->vals[2][0],
1303            a10 = src->vals[0][1],
1304            a11 = src->vals[1][1],
1305            a12 = src->vals[2][1],
1306            a20 = src->vals[0][2],
1307            a21 = src->vals[1][2],
1308            a22 = src->vals[2][2];
1309 
1310     double b0 = a00*a11 - a01*a10,
1311            b1 = a00*a12 - a02*a10,
1312            b2 = a01*a12 - a02*a11,
1313            b3 = a20,
1314            b4 = a21,
1315            b5 = a22;
1316 
1317     double determinant = b0*b5
1318                        - b1*b4
1319                        + b2*b3;
1320 
1321     if (determinant == 0) {
1322         return false;
1323     }
1324 
1325     double invdet = 1.0 / determinant;
1326     if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1327         return false;
1328     }
1329 
1330     b0 *= invdet;
1331     b1 *= invdet;
1332     b2 *= invdet;
1333     b3 *= invdet;
1334     b4 *= invdet;
1335     b5 *= invdet;
1336 
1337     dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1338     dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1339     dst->vals[2][0] = (float)(        +     b2 );
1340     dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1341     dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1342     dst->vals[2][1] = (float)(        -     b1 );
1343     dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1344     dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1345     dst->vals[2][2] = (float)(        +     b0 );
1346 
1347     for (int r = 0; r < 3; ++r)
1348     for (int c = 0; c < 3; ++c) {
1349         if (!isfinitef_(dst->vals[r][c])) {
1350             return false;
1351         }
1352     }
1353     return true;
1354 }
1355 
skcms_Matrix3x3_concat(const skcms_Matrix3x3 * A,const skcms_Matrix3x3 * B)1356 skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1357     skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1358     for (int r = 0; r < 3; r++)
1359         for (int c = 0; c < 3; c++) {
1360             m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1361                          + A->vals[r][1] * B->vals[1][c]
1362                          + A->vals[r][2] * B->vals[2][c];
1363         }
1364     return m;
1365 }
1366 
1367 #if defined(__clang__) || defined(__GNUC__)
1368     #define small_memcpy __builtin_memcpy
1369 #else
1370     #define small_memcpy memcpy
1371 #endif
1372 
log2f_(float x)1373 static float log2f_(float x) {
1374     // The first approximation of log2(x) is its exponent 'e', minus 127.
1375     int32_t bits;
1376     small_memcpy(&bits, &x, sizeof(bits));
1377 
1378     float e = (float)bits * (1.0f / (1<<23));
1379 
1380     // If we use the mantissa too we can refine the error signficantly.
1381     int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
1382     float m;
1383     small_memcpy(&m, &m_bits, sizeof(m));
1384 
1385     return (e - 124.225514990f
1386               -   1.498030302f*m
1387               -   1.725879990f/(0.3520887068f + m));
1388 }
1389 
exp2f_(float x)1390 static float exp2f_(float x) {
1391     float fract = x - floorf_(x);
1392 
1393     float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
1394                                         -   1.490129070f*fract
1395                                         +  27.728023300f/(4.84252568f - fract));
1396     if (fbits > INT_MAX) {
1397         return INFINITY_;
1398     } else if (fbits < INT_MIN) {
1399         return -INFINITY_;
1400     }
1401     int32_t bits = (int32_t)fbits;
1402     small_memcpy(&x, &bits, sizeof(x));
1403     return x;
1404 }
1405 
powf_(float x,float y)1406 float powf_(float x, float y) {
1407     return (x == 0) || (x == 1) ? x
1408                                 : exp2f_(log2f_(x) * y);
1409 }
1410 
skcms_TransferFunction_eval(const skcms_TransferFunction * tf,float x)1411 float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
1412     float sign = x < 0 ? -1.0f : 1.0f;
1413     x *= sign;
1414 
1415     return sign * (x < tf->d ? tf->c * x + tf->f
1416                              : powf_(tf->a * x + tf->b, tf->g) + tf->e);
1417 }
1418 
1419 #if defined(__clang__)
1420     [[clang::no_sanitize("float-divide-by-zero")]]  // Checked for by tf_is_valid() on the way out.
1421 #endif
skcms_TransferFunction_invert(const skcms_TransferFunction * src,skcms_TransferFunction * dst)1422 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1423     if (!tf_is_valid(src)) {
1424         return false;
1425     }
1426 
1427     // We're inverting this function, solving for x in terms of y.
1428     //   y = (cx + f)         x < d
1429     //       (ax + b)^g + e   x ≥ d
1430     // The inverse of this function can be expressed in the same piecewise form.
1431     skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1432 
1433     // We'll start by finding the new threshold inv.d.
1434     // In principle we should be able to find that by solving for y at x=d from either side.
1435     // (If those two d values aren't the same, it's a discontinuous transfer function.)
1436     float d_l =       src->c * src->d + src->f,
1437           d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1438     if (fabsf_(d_l - d_r) > 1/512.0f) {
1439         return false;
1440     }
1441     inv.d = d_l;  // TODO(mtklein): better in practice to choose d_r?
1442 
1443     // When d=0, the linear section collapses to a point.  We leave c,d,f all zero in that case.
1444     if (inv.d > 0) {
1445         // Inverting the linear section is pretty straightfoward:
1446         //        y       = cx + f
1447         //        y - f   = cx
1448         //   (1/c)y - f/c = x
1449         inv.c =    1.0f/src->c;
1450         inv.f = -src->f/src->c;
1451     }
1452 
1453     // The interesting part is inverting the nonlinear section:
1454     //         y                = (ax + b)^g + e.
1455     //         y - e            = (ax + b)^g
1456     //        (y - e)^1/g       =  ax + b
1457     //        (y - e)^1/g - b   =  ax
1458     //   (1/a)(y - e)^1/g - b/a =   x
1459     //
1460     // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1461     //   let k = (1/a)^g
1462     //   (1/a)( y -  e)^1/g - b/a = x
1463     //        (ky - ke)^1/g - b/a = x
1464 
1465     float k = powf_(src->a, -src->g);  // (1/a)^g == a^-g
1466     inv.g = 1.0f / src->g;
1467     inv.a = k;
1468     inv.b = -k * src->e;
1469     inv.e = -src->b / src->a;
1470 
1471     // Now in principle we're done.
1472     // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f,
1473     // we'll tweak e.  These two values should be close to each other,
1474     // just down to numerical precision issues, especially from powf_.
1475     float s = powf_(src->a + src->b, src->g) + src->e;
1476     inv.e = 1.0f - powf_(inv.a * s + inv.b, inv.g);
1477 
1478     *dst = inv;
1479     return tf_is_valid(dst);
1480 }
1481 
1482 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1483 
1484 // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1485 //
1486 //   tf(x) =  cx + f          x < d
1487 //   tf(x) = (ax + b)^g + e   x ≥ d
1488 //
1489 // When fitting, we add the additional constraint that both pieces meet at d:
1490 //
1491 //   cd + f = (ad + b)^g + e
1492 //
1493 // Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1494 //
1495 //   tf(x) =                           cx + f   x < d
1496 //   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
1497 //
1498 // Our overall strategy is then:
1499 //    For a couple tolerances,
1500 //       - fit_linear():    fit c,d,f iteratively to as many points as our tolerance allows
1501 //       - invert c,d,f
1502 //       - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1503 //                          (and by constraint, inverted e) to the inverse of the table.
1504 //    Return the parameters with least maximum error.
1505 //
1506 // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1507 // of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1508 //
1509 //    let y = Table(x)
1510 //    r(x) = x - f_inv(y)
1511 //
1512 //    ∂r/∂g = ln(ay + b)*(ay + b)^g
1513 //          - ln(ad + b)*(ad + b)^g
1514 //    ∂r/∂a = yg(ay + b)^(g-1)
1515 //          - dg(ad + b)^(g-1)
1516 //    ∂r/∂b =  g(ay + b)^(g-1)
1517 //          -  g(ad + b)^(g-1)
1518 
1519 // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1520 // and fill out the gradient of the residual into dfdP.
rg_nonlinear(float x,const skcms_Curve * curve,const skcms_TransferFunction * tf,const float P[3],float dfdP[3])1521 static float rg_nonlinear(float x,
1522                           const skcms_Curve* curve,
1523                           const skcms_TransferFunction* tf,
1524                           const float P[3],
1525                           float dfdP[3]) {
1526     const float y = eval_curve(curve, x);
1527 
1528     const float g = P[0],  a = P[1],  b = P[2],
1529                 c = tf->c, d = tf->d, f = tf->f;
1530 
1531     const float Y = fmaxf_(a*y + b, 0.0f),
1532                 D =        a*d + b;
1533     assert (D >= 0);
1534 
1535     // The gradient.
1536     dfdP[0] = 0.69314718f*log2f_(Y)*powf_(Y, g)
1537             - 0.69314718f*log2f_(D)*powf_(D, g);
1538     dfdP[1] = y*g*powf_(Y, g-1)
1539             - d*g*powf_(D, g-1);
1540     dfdP[2] =   g*powf_(Y, g-1)
1541             -   g*powf_(D, g-1);
1542 
1543     // The residual.
1544     const float f_inv = powf_(Y, g)
1545                       - powf_(D, g)
1546                       + c*d + f;
1547     return x - f_inv;
1548 }
1549 
gauss_newton_step(const skcms_Curve * curve,const skcms_TransferFunction * tf,float P[3],float x0,float dx,int N)1550 static bool gauss_newton_step(const skcms_Curve* curve,
1551                               const skcms_TransferFunction* tf,
1552                               float P[3],
1553                               float x0, float dx, int N) {
1554     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1555     //
1556     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1557     //   where r(P) is the residual vector
1558     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
1559     //
1560     // Let's review the shape of each of these expressions:
1561     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
1562     //   Jf     is [N x 3], a matrix with an entry for each (x,P) pair
1563     //   Jf^T   is [3 x N], the transpose of Jf
1564     //
1565     //   Jf^T Jf   is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1566     //                                              and so is its inverse (Jf^T Jf)^-1
1567     //   Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1568     //
1569     // Our implementation strategy to get to the final ∆P is
1570     //   1) evaluate Jf^T Jf,   call that lhs
1571     //   2) evaluate Jf^T r(P), call that rhs
1572     //   3) invert lhs
1573     //   4) multiply inverse lhs by rhs
1574     //
1575     // This is a friendly implementation strategy because we don't have to have any
1576     // buffers that scale with N, and equally nice don't have to perform any matrix
1577     // operations that are variable size.
1578     //
1579     // Other implementation strategies could trade this off, e.g. evaluating the
1580     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1581     // the residuals.  That would probably require implementing singular value
1582     // decomposition, and would create a [3 x N] matrix to be multiplied by the
1583     // [N x 1] residual vector, but on the upside I think that'd eliminate the
1584     // possibility of this gauss_newton_step() function ever failing.
1585 
1586     // 0) start off with lhs and rhs safely zeroed.
1587     skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1588     skcms_Vector3   rhs = {  {0,0,0} };
1589 
1590     // 1,2) evaluate lhs and evaluate rhs
1591     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1592     //   so we'll have to update lhs and rhs at the same time.
1593     for (int i = 0; i < N; i++) {
1594         float x = x0 + i*dx;
1595 
1596         float dfdP[3] = {0,0,0};
1597         float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1598 
1599         for (int r = 0; r < 3; r++) {
1600             for (int c = 0; c < 3; c++) {
1601                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1602             }
1603             rhs.vals[r] += dfdP[r] * resid;
1604         }
1605     }
1606 
1607     // If any of the 3 P parameters are unused, this matrix will be singular.
1608     // Detect those cases and fix them up to indentity instead, so we can invert.
1609     for (int k = 0; k < 3; k++) {
1610         if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1611             lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1612             lhs.vals[k][k] = 1;
1613         }
1614     }
1615 
1616     // 3) invert lhs
1617     skcms_Matrix3x3 lhs_inv;
1618     if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1619         return false;
1620     }
1621 
1622     // 4) multiply inverse lhs by rhs
1623     skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1624     P[0] += dP.vals[0];
1625     P[1] += dP.vals[1];
1626     P[2] += dP.vals[2];
1627     return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1628 }
1629 
1630 
1631 // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
fit_nonlinear(const skcms_Curve * curve,int L,int N,skcms_TransferFunction * tf)1632 static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1633     float P[3] = { tf->g, tf->a, tf->b };
1634 
1635     // No matter where we start, dx should always represent N even steps from 0 to 1.
1636     const float dx = 1.0f / (N-1);
1637 
1638     for (int j = 0; j < 3/*TODO: tune*/; j++) {
1639         // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1640         // We don't really know how to fix up a if it goes negative.
1641         if (P[1] < 0) {
1642             return false;
1643         }
1644         // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1645         if (P[1] * tf->d + P[2] < 0) {
1646             P[2] = -P[1] * tf->d;
1647         }
1648         assert (P[1] >= 0 &&
1649                 P[1] * tf->d + P[2] >= 0);
1650 
1651         if (!gauss_newton_step(curve, tf,
1652                                P,
1653                                L*dx, dx, N-L)) {
1654             return false;
1655         }
1656     }
1657 
1658     // We need to apply our fixups one last time
1659     if (P[1] < 0) {
1660         return false;
1661     }
1662     if (P[1] * tf->d + P[2] < 0) {
1663         P[2] = -P[1] * tf->d;
1664     }
1665 
1666     tf->g = P[0];
1667     tf->a = P[1];
1668     tf->b = P[2];
1669     tf->e =   tf->c*tf->d + tf->f
1670       - powf_(tf->a*tf->d + tf->b, tf->g);
1671     return true;
1672 }
1673 
skcms_ApproximateCurve(const skcms_Curve * curve,skcms_TransferFunction * approx,float * max_error)1674 bool skcms_ApproximateCurve(const skcms_Curve* curve,
1675                             skcms_TransferFunction* approx,
1676                             float* max_error) {
1677     if (!curve || !approx || !max_error) {
1678         return false;
1679     }
1680 
1681     if (curve->table_entries == 0) {
1682         // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1683         return false;
1684     }
1685 
1686     if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1687         // We need at least two points, and must put some reasonable cap on the maximum number.
1688         return false;
1689     }
1690 
1691     int N = (int)curve->table_entries;
1692     const float dx = 1.0f / (N - 1);
1693 
1694     *max_error = INFINITY_;
1695     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1696     for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1697         skcms_TransferFunction tf,
1698                                tf_inv;
1699 
1700         // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1701         tf.f = 0.0f;
1702         int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1703 
1704         if (L == N) {
1705             // If the entire data set was linear, move the coefficients to the nonlinear portion
1706             // with G == 1.  This lets use a canonical representation with d == 0.
1707             tf.g = 1;
1708             tf.a = tf.c;
1709             tf.b = tf.f;
1710             tf.c = tf.d = tf.e = tf.f = 0;
1711         } else if (L == N - 1) {
1712             // Degenerate case with only two points in the nonlinear segment. Solve directly.
1713             tf.g = 1;
1714             tf.a = (eval_curve(curve, (N-1)*dx) -
1715                     eval_curve(curve, (N-2)*dx))
1716                  / dx;
1717             tf.b = eval_curve(curve, (N-2)*dx)
1718                  - tf.a * (N-2)*dx;
1719             tf.e = 0;
1720         } else {
1721             // Start by guessing a gamma-only curve through the midpoint.
1722             int mid = (L + N) / 2;
1723             float mid_x = mid / (N - 1.0f);
1724             float mid_y = eval_curve(curve, mid_x);
1725             tf.g = log2f_(mid_y) / log2f_(mid_x);
1726             tf.a = 1;
1727             tf.b = 0;
1728             tf.e =    tf.c*tf.d + tf.f
1729               - powf_(tf.a*tf.d + tf.b, tf.g);
1730 
1731 
1732             if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1733                 !fit_nonlinear(curve, L,N, &tf_inv)) {
1734                 continue;
1735             }
1736 
1737             // We fit tf_inv, so calculate tf to keep in sync.
1738             if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1739                 continue;
1740             }
1741         }
1742 
1743         // We find our error by roundtripping the table through tf_inv.
1744         //
1745         // (The most likely use case for this approximation is to be inverted and
1746         // used as the transfer function for a destination color space.)
1747         //
1748         // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1749         // invertible, so re-verify that here (and use the new inverse for testing).
1750         if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1751             continue;
1752         }
1753 
1754         float err = max_roundtrip_error(curve, &tf_inv);
1755         if (*max_error > err) {
1756             *max_error = err;
1757             *approx    = tf;
1758         }
1759     }
1760     return isfinitef_(*max_error);
1761 }
1762 
1763 // ~~~~ Impl. of skcms_Transform() ~~~~
1764 
1765 typedef enum {
1766     Op_load_a8,
1767     Op_load_g8,
1768     Op_load_8888_palette8,
1769     Op_load_4444,
1770     Op_load_565,
1771     Op_load_888,
1772     Op_load_8888,
1773     Op_load_1010102,
1774     Op_load_161616LE,
1775     Op_load_16161616LE,
1776     Op_load_161616BE,
1777     Op_load_16161616BE,
1778     Op_load_hhh,
1779     Op_load_hhhh,
1780     Op_load_fff,
1781     Op_load_ffff,
1782 
1783     Op_swap_rb,
1784     Op_clamp,
1785     Op_invert,
1786     Op_force_opaque,
1787     Op_premul,
1788     Op_unpremul,
1789     Op_matrix_3x3,
1790     Op_matrix_3x4,
1791     Op_lab_to_xyz,
1792 
1793     Op_tf_r,
1794     Op_tf_g,
1795     Op_tf_b,
1796     Op_tf_a,
1797 
1798     Op_table_r,
1799     Op_table_g,
1800     Op_table_b,
1801     Op_table_a,
1802 
1803     Op_clut,
1804 
1805     Op_store_a8,
1806     Op_store_g8,
1807     Op_store_4444,
1808     Op_store_565,
1809     Op_store_888,
1810     Op_store_8888,
1811     Op_store_1010102,
1812     Op_store_161616LE,
1813     Op_store_16161616LE,
1814     Op_store_161616BE,
1815     Op_store_16161616BE,
1816     Op_store_hhh,
1817     Op_store_hhhh,
1818     Op_store_fff,
1819     Op_store_ffff,
1820 } Op;
1821 
1822 #if defined(__clang__)
1823     template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
1824 #elif defined(__GNUC__)
1825     // For some reason GCC accepts this nonsense, but not the more straightforward version,
1826     //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
1827     template <int N, typename T>
1828     struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
1829 
1830     template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
1831 #endif
1832 
1833 // First, instantiate our default exec_ops() implementation using the default compiliation target.
1834 
1835 namespace baseline {
1836 #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
1837                             || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
1838     #define N 1
1839     template <typename T> using V = T;
1840     using Color = float;
1841 #elif defined(__AVX512F__)
1842     #define N 16
1843     template <typename T> using V = Vec<N,T>;
1844     using Color = float;
1845 #elif defined(__AVX__)
1846     #define N 8
1847     template <typename T> using V = Vec<N,T>;
1848     using Color = float;
1849 #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
1850     #define N 8
1851     template <typename T> using V = Vec<N,T>;
1852     using Color = _Float16;
1853 #else
1854     #define N 4
1855     template <typename T> using V = Vec<N,T>;
1856     using Color = float;
1857 #endif
1858 
1859     #include "src/Transform_inl.h"
1860     #undef N
1861 }
1862 
1863 // Now, instantiate any other versions of run_program() we may want for runtime detection.
1864 #if !defined(SKCMS_PORTABLE) &&                           \
1865     !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) &&           \
1866         (( defined(__clang__) && __clang_major__ >= 5) || \
1867          (!defined(__clang__) && defined(__GNUC__)))      \
1868      && defined(__x86_64__)
1869 
1870     #if !defined(__AVX2__)
1871         #if defined(__clang__)
1872             #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
1873         #elif defined(__GNUC__)
1874             #pragma GCC push_options
1875             #pragma GCC target("avx2,f16c")
1876         #endif
1877 
1878         namespace hsw {
1879             #define USING_AVX
1880             #define USING_AVX_F16C
1881             #define USING_AVX2
1882             #define N 8
1883             template <typename T> using V = Vec<N,T>;
1884             using Color = float;
1885 
1886             #include "src/Transform_inl.h"
1887 
1888             // src/Transform_inl.h will undefine USING_* for us.
1889             #undef N
1890         }
1891 
1892         #if defined(__clang__)
1893             #pragma clang attribute pop
1894         #elif defined(__GNUC__)
1895             #pragma GCC pop_options
1896         #endif
1897 
1898         #define TEST_FOR_HSW
1899     #endif
1900 
1901     #if !defined(__AVX512F__)
1902         #if defined(__clang__)
1903             #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
1904         #elif defined(__GNUC__)
1905             #pragma GCC push_options
1906             #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
1907         #endif
1908 
1909         namespace skx {
1910             #define USING_AVX512F
1911             #define N 16
1912             template <typename T> using V = Vec<N,T>;
1913             using Color = float;
1914 
1915             #include "src/Transform_inl.h"
1916 
1917             // src/Transform_inl.h will undefine USING_* for us.
1918             #undef N
1919         }
1920 
1921         #if defined(__clang__)
1922             #pragma clang attribute pop
1923         #elif defined(__GNUC__)
1924             #pragma GCC pop_options
1925         #endif
1926 
1927         #define TEST_FOR_SKX
1928     #endif
1929 
1930     #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
1931         enum class CpuType { None, HSW, SKX };
cpu_type()1932         static CpuType cpu_type() {
1933             static const CpuType type = []{
1934                 // See http://www.sandpile.org/x86/cpuid.htm
1935 
1936                 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
1937                 uint32_t eax, ebx, ecx, edx;
1938                 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1939                                              : "0"(1), "2"(0));
1940                 if ((edx & (1u<<25)) &&  // SSE
1941                     (edx & (1u<<26)) &&  // SSE2
1942                     (ecx & (1u<< 0)) &&  // SSE3
1943                     (ecx & (1u<< 9)) &&  // SSSE3
1944                     (ecx & (1u<<12)) &&  // FMA (N.B. not used, avoided even)
1945                     (ecx & (1u<<19)) &&  // SSE4.1
1946                     (ecx & (1u<<20)) &&  // SSE4.2
1947                     (ecx & (1u<<26)) &&  // XSAVE
1948                     (ecx & (1u<<27)) &&  // OSXSAVE
1949                     (ecx & (1u<<28)) &&  // AVX
1950                     (ecx & (1u<<29))) {  // F16C
1951 
1952                     // Call cpuid(7) to check for AVX2 and AVX-512 bits.
1953                     __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1954                                                  : "0"(7), "2"(0));
1955                     // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
1956                     uint32_t xcr0, dont_need_edx;
1957                     __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
1958 
1959                     if ((xcr0 & (1u<<1)) &&  // XMM register state saved?
1960                         (xcr0 & (1u<<2)) &&  // YMM register state saved?
1961                         (ebx  & (1u<<5))) {  // AVX2
1962                         // At this point we're at least HSW.  Continue checking for SKX.
1963                         if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
1964                             (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
1965                             (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
1966                             (ebx  & (1u<<16)) && // AVX512F
1967                             (ebx  & (1u<<17)) && // AVX512DQ
1968                             (ebx  & (1u<<28)) && // AVX512CD
1969                             (ebx  & (1u<<30)) && // AVX512BW
1970                             (ebx  & (1u<<31))) { // AVX512VL
1971                             return CpuType::SKX;
1972                         }
1973                         return CpuType::HSW;
1974                     }
1975                 }
1976                 return CpuType::None;
1977             }();
1978             return type;
1979         }
1980     #endif
1981 
1982 #endif
1983 
is_identity_tf(const skcms_TransferFunction * tf)1984 static bool is_identity_tf(const skcms_TransferFunction* tf) {
1985     return tf->g == 1 && tf->a == 1
1986         && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0;
1987 }
1988 
1989 typedef struct {
1990     Op          op;
1991     const void* arg;
1992 } OpAndArg;
1993 
select_curve_op(const skcms_Curve * curve,int channel)1994 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
1995     static const struct { Op parametric, table; } ops[] = {
1996         { Op_tf_r, Op_table_r },
1997         { Op_tf_g, Op_table_g },
1998         { Op_tf_b, Op_table_b },
1999         { Op_tf_a, Op_table_a },
2000     };
2001 
2002     const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2003 
2004     if (curve->table_entries == 0) {
2005         return is_identity_tf(&curve->parametric)
2006             ? noop
2007             : OpAndArg{ ops[channel].parametric, &curve->parametric };
2008     }
2009 
2010     return OpAndArg{ ops[channel].table, curve };
2011 }
2012 
bytes_per_pixel(skcms_PixelFormat fmt)2013 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2014     switch (fmt >> 1) {   // ignore rgb/bgr
2015         case skcms_PixelFormat_A_8                >> 1: return  1;
2016         case skcms_PixelFormat_G_8                >> 1: return  1;
2017         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return  1;
2018         case skcms_PixelFormat_ABGR_4444          >> 1: return  2;
2019         case skcms_PixelFormat_RGB_565            >> 1: return  2;
2020         case skcms_PixelFormat_RGB_888            >> 1: return  3;
2021         case skcms_PixelFormat_RGBA_8888          >> 1: return  4;
2022         case skcms_PixelFormat_RGBA_1010102       >> 1: return  4;
2023         case skcms_PixelFormat_RGB_161616LE       >> 1: return  6;
2024         case skcms_PixelFormat_RGBA_16161616LE    >> 1: return  8;
2025         case skcms_PixelFormat_RGB_161616BE       >> 1: return  6;
2026         case skcms_PixelFormat_RGBA_16161616BE    >> 1: return  8;
2027         case skcms_PixelFormat_RGB_hhh_Norm       >> 1: return  6;
2028         case skcms_PixelFormat_RGBA_hhhh_Norm     >> 1: return  8;
2029         case skcms_PixelFormat_RGB_hhh            >> 1: return  6;
2030         case skcms_PixelFormat_RGBA_hhhh          >> 1: return  8;
2031         case skcms_PixelFormat_RGB_fff            >> 1: return 12;
2032         case skcms_PixelFormat_RGBA_ffff          >> 1: return 16;
2033     }
2034     assert(false);
2035     return 0;
2036 }
2037 
prep_for_destination(const skcms_ICCProfile * profile,skcms_Matrix3x3 * fromXYZD50,skcms_TransferFunction * invR,skcms_TransferFunction * invG,skcms_TransferFunction * invB)2038 static bool prep_for_destination(const skcms_ICCProfile* profile,
2039                                  skcms_Matrix3x3* fromXYZD50,
2040                                  skcms_TransferFunction* invR,
2041                                  skcms_TransferFunction* invG,
2042                                  skcms_TransferFunction* invB) {
2043     // We only support destinations with parametric transfer functions
2044     // and with gamuts that can be transformed from XYZD50.
2045     return profile->has_trc
2046         && profile->has_toXYZD50
2047         && profile->trc[0].table_entries == 0
2048         && profile->trc[1].table_entries == 0
2049         && profile->trc[2].table_entries == 0
2050         && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2051         && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2052         && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2053         && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2054 }
2055 
skcms_Transform(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t npixels)2056 bool skcms_Transform(const void*             src,
2057                      skcms_PixelFormat       srcFmt,
2058                      skcms_AlphaFormat       srcAlpha,
2059                      const skcms_ICCProfile* srcProfile,
2060                      void*                   dst,
2061                      skcms_PixelFormat       dstFmt,
2062                      skcms_AlphaFormat       dstAlpha,
2063                      const skcms_ICCProfile* dstProfile,
2064                      size_t                  npixels) {
2065     return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2066                                       dst, dstFmt, dstAlpha, dstProfile,
2067                                       npixels, nullptr);
2068 }
2069 
skcms_TransformWithPalette(const void * src,skcms_PixelFormat srcFmt,skcms_AlphaFormat srcAlpha,const skcms_ICCProfile * srcProfile,void * dst,skcms_PixelFormat dstFmt,skcms_AlphaFormat dstAlpha,const skcms_ICCProfile * dstProfile,size_t nz,const void * palette)2070 bool skcms_TransformWithPalette(const void*             src,
2071                                 skcms_PixelFormat       srcFmt,
2072                                 skcms_AlphaFormat       srcAlpha,
2073                                 const skcms_ICCProfile* srcProfile,
2074                                 void*                   dst,
2075                                 skcms_PixelFormat       dstFmt,
2076                                 skcms_AlphaFormat       dstAlpha,
2077                                 const skcms_ICCProfile* dstProfile,
2078                                 size_t                  nz,
2079                                 const void*             palette) {
2080     const size_t dst_bpp = bytes_per_pixel(dstFmt),
2081                  src_bpp = bytes_per_pixel(srcFmt);
2082     // Let's just refuse if the request is absurdly big.
2083     if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2084         return false;
2085     }
2086     int n = (int)nz;
2087 
2088     // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2089     if (!srcProfile) {
2090         srcProfile = skcms_sRGB_profile();
2091     }
2092     if (!dstProfile) {
2093         dstProfile = skcms_sRGB_profile();
2094     }
2095 
2096     // We can't transform in place unless the PixelFormats are the same size.
2097     if (dst == src && dst_bpp != src_bpp) {
2098         return false;
2099     }
2100     // TODO: more careful alias rejection (like, dst == src + 1)?
2101 
2102     if (needs_palette(srcFmt) && !palette) {
2103         return false;
2104     }
2105 
2106     Op          program  [32];
2107     const void* arguments[32];
2108 
2109     Op*          ops  = program;
2110     const void** args = arguments;
2111 
2112     skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b;
2113     skcms_Matrix3x3        from_xyz;
2114 
2115     switch (srcFmt >> 1) {
2116         default: return false;
2117         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_load_a8;         break;
2118         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_load_g8;         break;
2119         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_load_4444;       break;
2120         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_load_565;        break;
2121         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_load_888;        break;
2122         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_load_8888;       break;
2123         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_load_1010102;    break;
2124         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_load_161616LE;   break;
2125         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2126         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_load_161616BE;   break;
2127         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2128         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_load_hhh;        break;
2129         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_load_hhhh;       break;
2130         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_load_hhh;        break;
2131         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_load_hhhh;       break;
2132         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_load_fff;        break;
2133         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_load_ffff;       break;
2134 
2135         case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++  = Op_load_8888_palette8;
2136                                                         *args++ = palette;
2137                                                         break;
2138     }
2139     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2140         srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2141         *ops++ = Op_clamp;
2142     }
2143     if (srcFmt & 1) {
2144         *ops++ = Op_swap_rb;
2145     }
2146     skcms_ICCProfile gray_dst_profile;
2147     if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2148         // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2149         // luminance (Y) by the destination transfer function.
2150         gray_dst_profile = *dstProfile;
2151         skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2152         dstProfile = &gray_dst_profile;
2153     }
2154 
2155     if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2156         // Photoshop creates CMYK images as inverse CMYK.
2157         // These happen to be the only ones we've _ever_ seen.
2158         *ops++ = Op_invert;
2159         // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2160         srcAlpha = skcms_AlphaFormat_Unpremul;
2161     }
2162 
2163     if (srcAlpha == skcms_AlphaFormat_Opaque) {
2164         *ops++ = Op_force_opaque;
2165     } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2166         *ops++ = Op_unpremul;
2167     }
2168 
2169     if (dstProfile != srcProfile) {
2170 
2171         if (!prep_for_destination(dstProfile,
2172                                   &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) {
2173             return false;
2174         }
2175 
2176         if (srcProfile->has_A2B) {
2177             if (srcProfile->A2B.input_channels) {
2178                 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2179                     OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2180                     if (oa.arg) {
2181                         *ops++  = oa.op;
2182                         *args++ = oa.arg;
2183                     }
2184                 }
2185                 *ops++ = Op_clamp;
2186                 *ops++ = Op_clut;
2187                 *args++ = &srcProfile->A2B;
2188             }
2189 
2190             if (srcProfile->A2B.matrix_channels == 3) {
2191                 for (int i = 0; i < 3; i++) {
2192                     OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2193                     if (oa.arg) {
2194                         *ops++  = oa.op;
2195                         *args++ = oa.arg;
2196                     }
2197                 }
2198 
2199                 static const skcms_Matrix3x4 I = {{
2200                     {1,0,0,0},
2201                     {0,1,0,0},
2202                     {0,0,1,0},
2203                 }};
2204                 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2205                     *ops++  = Op_matrix_3x4;
2206                     *args++ = &srcProfile->A2B.matrix;
2207                 }
2208             }
2209 
2210             if (srcProfile->A2B.output_channels == 3) {
2211                 for (int i = 0; i < 3; i++) {
2212                     OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2213                     if (oa.arg) {
2214                         *ops++  = oa.op;
2215                         *args++ = oa.arg;
2216                     }
2217                 }
2218             }
2219 
2220             if (srcProfile->pcs == skcms_Signature_Lab) {
2221                 *ops++ = Op_lab_to_xyz;
2222             }
2223 
2224         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2225             for (int i = 0; i < 3; i++) {
2226                 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2227                 if (oa.arg) {
2228                     *ops++  = oa.op;
2229                     *args++ = oa.arg;
2230                 }
2231             }
2232         } else {
2233             return false;
2234         }
2235 
2236         // A2B sources should already be in XYZD50 at this point.
2237         // Others still need to be transformed using their toXYZD50 matrix.
2238         // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2239         // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2240         assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2241         static const skcms_Matrix3x3 I = {{
2242             { 1.0f, 0.0f, 0.0f },
2243             { 0.0f, 1.0f, 0.0f },
2244             { 0.0f, 0.0f, 1.0f },
2245         }};
2246         const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2247 
2248         // There's a chance the source and destination gamuts are identical,
2249         // in which case we can skip the gamut transform.
2250         if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2251             // Concat the entire gamut transform into from_xyz,
2252             // now slightly misnamed but it's a handy spot to stash the result.
2253             from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2254             *ops++  = Op_matrix_3x3;
2255             *args++ = &from_xyz;
2256         }
2257 
2258         // Encode back to dst RGB using its parametric transfer functions.
2259         if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; }
2260         if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; }
2261         if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; }
2262     }
2263 
2264     // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2265     // not just to values that fit in [0,1].
2266     //
2267     // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2268     // but would be carrying r > 1, which is really unexpected for downstream consumers.
2269     if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2270         *ops++ = Op_clamp;
2271     }
2272     if (dstAlpha == skcms_AlphaFormat_Opaque) {
2273         *ops++ = Op_force_opaque;
2274     } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2275         *ops++ = Op_premul;
2276     }
2277     if (dstFmt & 1) {
2278         *ops++ = Op_swap_rb;
2279     }
2280     switch (dstFmt >> 1) {
2281         default: return false;
2282         case skcms_PixelFormat_A_8             >> 1: *ops++ = Op_store_a8;         break;
2283         case skcms_PixelFormat_G_8             >> 1: *ops++ = Op_store_g8;         break;
2284         case skcms_PixelFormat_ABGR_4444       >> 1: *ops++ = Op_store_4444;       break;
2285         case skcms_PixelFormat_RGB_565         >> 1: *ops++ = Op_store_565;        break;
2286         case skcms_PixelFormat_RGB_888         >> 1: *ops++ = Op_store_888;        break;
2287         case skcms_PixelFormat_RGBA_8888       >> 1: *ops++ = Op_store_8888;       break;
2288         case skcms_PixelFormat_RGBA_1010102    >> 1: *ops++ = Op_store_1010102;    break;
2289         case skcms_PixelFormat_RGB_161616LE    >> 1: *ops++ = Op_store_161616LE;   break;
2290         case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2291         case skcms_PixelFormat_RGB_161616BE    >> 1: *ops++ = Op_store_161616BE;   break;
2292         case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2293         case skcms_PixelFormat_RGB_hhh_Norm    >> 1: *ops++ = Op_store_hhh;        break;
2294         case skcms_PixelFormat_RGBA_hhhh_Norm  >> 1: *ops++ = Op_store_hhhh;       break;
2295         case skcms_PixelFormat_RGB_hhh         >> 1: *ops++ = Op_store_hhh;        break;
2296         case skcms_PixelFormat_RGBA_hhhh       >> 1: *ops++ = Op_store_hhhh;       break;
2297         case skcms_PixelFormat_RGB_fff         >> 1: *ops++ = Op_store_fff;        break;
2298         case skcms_PixelFormat_RGBA_ffff       >> 1: *ops++ = Op_store_ffff;       break;
2299     }
2300 
2301     auto run = baseline::run_program;
2302 #if defined(TEST_FOR_HSW)
2303     switch (cpu_type()) {
2304         case CpuType::None:                        break;
2305         case CpuType::HSW: run = hsw::run_program; break;
2306         case CpuType::SKX: run = hsw::run_program; break;
2307     }
2308 #endif
2309 #if defined(TEST_FOR_SKX)
2310     switch (cpu_type()) {
2311         case CpuType::None:                        break;
2312         case CpuType::HSW:                         break;
2313         case CpuType::SKX: run = skx::run_program; break;
2314     }
2315 #endif
2316     run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2317     return true;
2318 }
2319 
assert_usable_as_destination(const skcms_ICCProfile * profile)2320 static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2321 #if defined(NDEBUG)
2322     (void)profile;
2323 #else
2324     skcms_Matrix3x3 fromXYZD50;
2325     skcms_TransferFunction invR, invG, invB;
2326     assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2327 #endif
2328 }
2329 
skcms_MakeUsableAsDestination(skcms_ICCProfile * profile)2330 bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2331     skcms_Matrix3x3 fromXYZD50;
2332     if (!profile->has_trc || !profile->has_toXYZD50
2333         || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2334         return false;
2335     }
2336 
2337     skcms_TransferFunction tf[3];
2338     for (int i = 0; i < 3; i++) {
2339         skcms_TransferFunction inv;
2340         if (profile->trc[i].table_entries == 0
2341             && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2342             tf[i] = profile->trc[i].parametric;
2343             continue;
2344         }
2345 
2346         float max_error;
2347         // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2348         if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2349             return false;
2350         }
2351     }
2352 
2353     for (int i = 0; i < 3; ++i) {
2354         profile->trc[i].table_entries = 0;
2355         profile->trc[i].parametric = tf[i];
2356     }
2357 
2358     assert_usable_as_destination(profile);
2359     return true;
2360 }
2361 
skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile * profile)2362 bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2363     // Operate on a copy of profile, so we can choose the best TF for the original curves
2364     skcms_ICCProfile result = *profile;
2365     if (!skcms_MakeUsableAsDestination(&result)) {
2366         return false;
2367     }
2368 
2369     int best_tf = 0;
2370     float min_max_error = INFINITY_;
2371     for (int i = 0; i < 3; i++) {
2372         skcms_TransferFunction inv;
2373         if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2374             return false;
2375         }
2376 
2377         float err = 0;
2378         for (int j = 0; j < 3; ++j) {
2379             err = fmaxf_(err, max_roundtrip_error(&profile->trc[j], &inv));
2380         }
2381         if (min_max_error > err) {
2382             min_max_error = err;
2383             best_tf = i;
2384         }
2385     }
2386 
2387     for (int i = 0; i < 3; i++) {
2388         result.trc[i].parametric = result.trc[best_tf].parametric;
2389     }
2390 
2391     *profile = result;
2392     assert_usable_as_destination(profile);
2393     return true;
2394 }
2395